vendor/llvm/llvm-r72995

author: Ed Schouten <ed@FreeBSD.org> 2009-06-06 08:20:29 +0000
committer: Ed Schouten <ed@FreeBSD.org> 2009-06-06 08:20:29 +0000
commit: f4fe016fa15f703fe9c1b932d1e81e2c718521db (patch)
tree: 8a1bbd1a5b838080d31e5c93a1817006b8c62318
parent: 68eb509bdc5c7007520d5231cd92de28106236df (diff)
372 files changed, 4347 insertions, 2884 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ba63484d8c0a..81333987bed8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -58,13 +58,22 @@ endif( MSVC )
 option(LLVM_ENABLE_THREADS "Use threads if available." ON)
 
 if( uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
-  option(LLVM_ENABLE_ASSERTS "Enable asserts" OFF)
+  option(LLVM_ENABLE_ASSERTIONS "Enable assertions" OFF)
 else()
-  option(LLVM_ENABLE_ASSERTS "Enable asserts" ON)
+  option(LLVM_ENABLE_ASSERTIONS "Enable assertions" ON)
 endif()
 
-if( LLVM_ENABLE_ASSERTS )
-  add_definitions( -D_DEBUG -UNDEBUG )
+if( LLVM_ENABLE_ASSERTIONS )
+  add_definitions( -D_DEBUG )
+  # On Release builds cmake automatically defines NDEBUG, so we
+  # explicitly undefine it:
+  if( uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
+    add_definitions( -UNDEBUG )
+  endif()
+else()
+  if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
+    add_definitions( -DNDEBUG )
+  endif()
 endif()
 
 if( LLVM_TARGETS_TO_BUILD STREQUAL "all" )
@@ -81,6 +90,24 @@ endforeach(c)
 
 set(llvm_builded_incs_dir ${LLVM_BINARY_DIR}/include/llvm)
 
+# The USE_EXPLICIT_DEPENDENCIES variable will be TRUE to indicate that
+# we should use the library dependencies explicitly specified in the
+# CMakeLists.txt files rather than those determined by
+# llvm-config. This value must be true for non-make and IDE
+# generators.
+if (MSVC_IDE)
+  set(DEFAULT_USE_EXPLICIT_DEPENDENCIES ON)
+elseif (XCODE)
+  set(DEFAULT_USE_EXPLICIT_DEPENDENCIES ON)
+else ()
+ set(DEFAULT_USE_EXPLICIT_DEPENDENCIES OFF)
+endif ()
+
+option(USE_EXPLICIT_DEPENDENCIES 
+  "Use explicit dependencies instead of llvm-config" 
+  ${DEFAULT_USE_EXPLICIT_DEPENDENCIES})
+mark_as_advanced(USE_EXPLICIT_DEPENDENCIES)
+
 # Add path for custom modules
 set(CMAKE_MODULE_PATH
   ${CMAKE_MODULE_PATH}
@@ -123,11 +150,13 @@ include(config-ix)
 
 option(LLVM_ENABLE_PIC "Build Position-Independent Code" OFF)
 
+set(ENABLE_PIC 0)
 if( LLVM_ENABLE_PIC )
   if( SUPPORTS_FPIC_FLAG )
     message(STATUS "Building with -fPIC")
     add_llvm_definitions(-fPIC)
-  else( SUPPORTS_FPIC_FLAG )
+    set(ENABLE_PIC 1)
+ else( SUPPORTS_FPIC_FLAG )
     message(STATUS "Warning: -fPIC not supported.")
   endif()
 endif()
diff --git a/Makefile.rules b/Makefile.rules
index caa3335aa359..4a77bf536dff 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -1066,7 +1066,7 @@ all-local:: $(LibName.O)
 
 $(LibName.O): $(ObjectsO) $(LibDir)/.dir
 	$(Echo) Linking $(BuildMode) Object Library $(notdir $@)
-	$(Verb) $(Relink) -Wl,-r -nodefaultlibs -nostdlib -nostartfiles -o $@ $(ObjectsO)
+	$(Verb) $(Relink) -r -nodefaultlibs -nostdlib -nostartfiles -o $@ $(ObjectsO)
 
 clean-local::
 ifneq ($(strip $(LibName.O)),)
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 7f625d4bed15..64b78a49eb6d 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -279,7 +279,7 @@ dnl===-----------------------------------------------------------------------===
 
 dnl --enable-optimized : check whether they want to do an optimized build:
 AC_ARG_ENABLE(optimized, AS_HELP_STRING(
- [--enable-optimized,Compile with optimizations enabled (default is NO)]),,enableval=$optimize)
+ --enable-optimized,[Compile with optimizations enabled (default is NO)]),,enableval=$optimize)
 if test ${enableval} = "no" ; then
   AC_SUBST(ENABLE_OPTIMIZED,[[]])
 else
@@ -288,7 +288,7 @@ fi
 
 dnl --enable-profiling : check whether they want to do a profile build:
 AC_ARG_ENABLE(profiling, AS_HELP_STRING(
- [--enable-profiling,Compile with profiling enabled (default is NO)]),,enableval="no")
+ --enable-profiling,[Compile with profiling enabled (default is NO)]),,enableval="no")
 if test ${enableval} = "no" ; then
   AC_SUBST(ENABLE_PROFILING,[[]])
 else
@@ -297,7 +297,7 @@ fi
 
 dnl --enable-assertions : check whether they want to turn on assertions or not:
 AC_ARG_ENABLE(assertions,AS_HELP_STRING(
-  [--enable-assertions,Compile with assertion checks enabled (default is YES)]),, enableval="yes")
+  --enable-assertions,[Compile with assertion checks enabled (default is YES)]),, enableval="yes")
 if test ${enableval} = "yes" ; then
   AC_SUBST(DISABLE_ASSERTIONS,[[]])
 else
@@ -306,7 +306,7 @@ fi
 
 dnl --enable-expensive-checks : check whether they want to turn on expensive debug checks:
 AC_ARG_ENABLE(expensive-checks,AS_HELP_STRING(
-  [--enable-expensive-checks,Compile with expensive debug checks enabled (default is NO)]),, enableval="no")
+  --enable-expensive-checks,[Compile with expensive debug checks enabled (default is NO)]),, enableval="no")
 if test ${enableval} = "yes" ; then
   AC_SUBST(ENABLE_EXPENSIVE_CHECKS,[[ENABLE_EXPENSIVE_CHECKS=1]])
   AC_SUBST(EXPENSIVE_CHECKS,[[yes]])
@@ -317,7 +317,7 @@ fi
 
 dnl --enable-debug-runtime : should runtime libraries have debug symbols?
 AC_ARG_ENABLE(debug-runtime,
-   AS_HELP_STRING([--enable-debug-runtime,Build runtime libs with debug symbols (default is NO)]),,enableval=no)
+   AS_HELP_STRING(--enable-debug-runtime,[Build runtime libs with debug symbols (default is NO)]),,enableval=no)
 if test ${enableval} = "no" ; then
   AC_SUBST(DEBUG_RUNTIME,[[]])
 else
@@ -553,6 +553,16 @@ if test "x$WITH_BINUTILS_INCDIR" != xdefault ; then
   fi
 fi
 
+dnl --enable-libffi : check whether the user wants to turn off libffi:
+AC_ARG_ENABLE(libffi,AS_HELP_STRING(
+  --enable-libffi,[Check for the presence of libffi (default is YES)]),,
+  enableval=yes)
+case "$enableval" in
+  yes) llvm_cv_enable_libffi="yes" ;;
+  no)  llvm_cv_enable_libffi="no"  ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-libffi. Use "yes" or "no"]) ;;
+esac
+
 dnl===-----------------------------------------------------------------------===
 dnl===
 dnl=== SECTION 4: Check for programs we need and that they are the right version
@@ -769,9 +779,11 @@ AC_SEARCH_LIBS(dlopen,dl,AC_DEFINE([HAVE_DLOPEN],[1],
                AC_MSG_WARN([dlopen() not found - disabling plugin support]))
 
 dnl libffi is optional; used to call external functions from the interpreter
-AC_SEARCH_LIBS(ffi_call,ffi,AC_DEFINE([HAVE_FFI_CALL],[1],
-               [Define if libffi is available on this platform.]),
-               AC_MSG_WARN([libffi not found - disabling external calls from interpreter]))
+if test "$llvm_cv_enable_libffi" = "yes" ; then
+  AC_SEARCH_LIBS(ffi_call,ffi,AC_DEFINE([HAVE_FFI_CALL],[1],
+                 [Define if libffi is available on this platform.]),
+                 AC_MSG_WARN([libffi not found - disabling external calls from interpreter]))
+fi
 
 dnl mallinfo is optional; the code can compile (minus features) without it
 AC_SEARCH_LIBS(mallinfo,malloc,AC_DEFINE([HAVE_MALLINFO],[1],
@@ -836,7 +848,9 @@ else
 fi
 
 dnl Try to find ffi.h.
-AC_CHECK_HEADERS([ffi.h ffi/ffi.h])
+if test "$llvm_cv_enable_libffi" = "yes" ; then
+  AC_CHECK_HEADERS([ffi.h ffi/ffi.h])
+fi
 
 dnl===-----------------------------------------------------------------------===
 dnl===
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 0c903a190292..c18ac44172f9 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -1,6 +1,7 @@
 
 # include checks
 include(CheckIncludeFile)
+check_include_file(alloca.h HAVE_ALLOCA_H)
 check_include_file(argz.h HAVE_ARGZ_H)
 check_include_file(assert.h HAVE_ASSERT_H)
 check_include_file(dirent.h HAVE_DIRENT_H)
@@ -34,6 +35,7 @@ check_include_file(sys/resource.h HAVE_SYS_RESOURCE_H)
 check_include_file(sys/stat.h HAVE_SYS_STAT_H)
 check_include_file(sys/time.h HAVE_SYS_TIME_H)
 check_include_file(sys/types.h HAVE_SYS_TYPES_H)
+check_include_file(sys/wait.h HAVE_SYS_WAIT_H)
 check_include_file(termios.h HAVE_TERMIOS_H)
 check_include_file(unistd.h HAVE_UNISTD_H)
 check_include_file(utime.h HAVE_UTIME_H)
@@ -47,6 +49,7 @@ check_library_exists(dl dlopen "" HAVE_LIBDL)
 # function checks
 include(CheckSymbolExists)
 include(CheckFunctionExists)
+check_symbol_exists(alloca alloca.h HAVE_ALLOCA)
 check_symbol_exists(getpagesize unistd.h HAVE_GETPAGESIZE)
 check_symbol_exists(getrusage sys/resource.h HAVE_GETRUSAGE)
 check_symbol_exists(setrlimit sys/resource.h HAVE_SETRLIMIT)
@@ -58,6 +61,8 @@ check_symbol_exists(isnan math.h HAVE_ISNAN_IN_MATH_H)
 check_symbol_exists(ceilf math.h HAVE_CEILF)
 check_symbol_exists(floorf math.h HAVE_FLOORF)
 check_symbol_exists(mallinfo malloc.h HAVE_MALLINFO)
+check_symbol_exists(malloc_zone_statistics malloc/malloc.h
+                    HAVE_MALLOC_ZONE_STATISTICS)
 check_symbol_exists(pthread_mutex_lock pthread.h HAVE_PTHREAD_MUTEX_LOCK)
 check_symbol_exists(strtoll stdlib.h HAVE_STRTOLL)
 
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index c531298a30f0..a21ed20d3bae 100755
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -26,11 +26,11 @@ macro(add_llvm_executable name)
   if( LLVM_LINK_COMPONENTS )
     llvm_config(${name} ${LLVM_LINK_COMPONENTS})
   endif( LLVM_LINK_COMPONENTS )
-  if( MSVC )
+  if( USE_EXPLICIT_DEPENDENCIES )
     target_link_libraries(${name} ${llvm_libs})
-  else( MSVC )
+  else( )
     add_dependencies(${name} llvm-config.target)
-  endif( MSVC )
+  endif( )
   get_system_libs(llvm_system_libs)
   if( llvm_system_libs )
     target_link_libraries(${name} ${llvm_system_libs})
diff --git a/cmake/modules/AddPartiallyLinkedObject.cmake b/cmake/modules/AddPartiallyLinkedObject.cmake
index c9d96de1b9a8..d20666dd79fb 100755
--- a/cmake/modules/AddPartiallyLinkedObject.cmake
+++ b/cmake/modules/AddPartiallyLinkedObject.cmake
@@ -1,18 +1,18 @@
 include(LLVMProcessSources)
 
 macro(target_name_of_partially_linked_object lib var)
-  if( MSVC )
+  if( USE_EXPLICIT_DEPENDENCIES )
     set(${var} ${lib})
-  else( MSVC )
+  else( )
     set(${var} ${lib}_pll)
-  endif( MSVC )
+  endif( )
 endmacro(target_name_of_partially_linked_object lib var)
 
 
 macro(add_partially_linked_object lib)
-  if( MSVC )
+  if( USE_EXPLICIT_DEPENDENCIES )
     add_llvm_library( ${lib} ${ARGN})
-  else( MSVC )
+  else( )
     set(pll ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/${CMAKE_CFG_INTDIR}/${lib}.o)
     set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/temp_lib)
     set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/temp_lib)
@@ -36,7 +36,7 @@ macro(add_partially_linked_object lib)
     add_custom_target(${tnplo} ALL DEPENDS ${pll})
     set( llvm_libs ${llvm_libs} ${pll} PARENT_SCOPE)
     set( llvm_lib_targets ${llvm_lib_targets} ${tnplo} PARENT_SCOPE )
-  endif( MSVC )
+  endif( )
   install(FILES ${pll}
     DESTINATION lib)
 endmacro(add_partially_linked_object lib)
diff --git a/cmake/modules/LLVMConfig.cmake b/cmake/modules/LLVMConfig.cmake
index 9776fb05d2a6..fd9249518747 100755
--- a/cmake/modules/LLVMConfig.cmake
+++ b/cmake/modules/LLVMConfig.cmake
@@ -21,36 +21,40 @@ endfunction(get_system_libs)
 
 macro(llvm_config executable)
   # extra args is the list of link components.
-  if( MSVC )
-    msvc_llvm_config(${executable} ${ARGN})
-  else( MSVC )
+  if( USE_EXPLICIT_DEPENDENCIES )
+    explicit_llvm_config(${executable} ${ARGN})
+  else( )
     nix_llvm_config(${executable} ${ARGN})
-  endif( MSVC )
+  endif( )
 endmacro(llvm_config)
 
 
-function(msvc_llvm_config executable)
+function(explicit_llvm_config executable)
   set( link_components ${ARGN} )
-  if( CMAKE_CL_64 )
-    set(include_lflag "/INCLUDE:")
-  else( CMAKE_CL_64 )
-    set(include_lflag "/INCLUDE:_")
-  endif()
-  foreach(c ${link_components})
-    if( c STREQUAL "jit" )
-      set(lfgs "${lfgs} ${include_lflag}X86TargetMachineModule")
-    endif( c STREQUAL "jit" )
-    list(FIND LLVM_TARGETS_TO_BUILD ${c} idx)
-    if( NOT idx LESS 0 )
-      set(lfgs "${lfgs} ${include_lflag}${c}TargetMachineModule")
-      list(FIND LLVM_ASMPRINTERS_FORCE_LINK ${c} idx)
+
+  set(lfgs)
+  if (MSVC)
+    if( CMAKE_CL_64 )
+      set(include_lflag "/INCLUDE:")
+    else( CMAKE_CL_64 )
+      set(include_lflag "/INCLUDE:_")
+    endif()
+    foreach(c ${link_components})
+      if( c STREQUAL "jit" )
+        set(lfgs "${lfgs} ${include_lflag}X86TargetMachineModule")
+      endif( c STREQUAL "jit" )
+      list(FIND LLVM_TARGETS_TO_BUILD ${c} idx)
       if( NOT idx LESS 0 )
-	set(lfgs "${lfgs} ${include_lflag}${c}AsmPrinterForceLink")
+        set(lfgs "${lfgs} ${include_lflag}${c}TargetMachineModule")
+        list(FIND LLVM_ASMPRINTERS_FORCE_LINK ${c} idx)
+        if( NOT idx LESS 0 )
+	  set(lfgs "${lfgs} ${include_lflag}${c}AsmPrinterForceLink")
+        endif()
       endif()
-    endif()
-  endforeach(c)
+    endforeach(c)
+  endif ()
 
-  msvc_map_components_to_libraries(LIBRARIES ${link_components})
+  explicit_map_components_to_libraries(LIBRARIES ${link_components})
   target_link_libraries(${executable} ${LIBRARIES})
 
   if( lfgs )
@@ -58,10 +62,10 @@ function(msvc_llvm_config executable)
       PROPERTIES
       LINK_FLAGS ${lfgs})
   endif()
-endfunction(msvc_llvm_config)
+endfunction(explicit_llvm_config)
 
 
-function(msvc_map_components_to_libraries out_libs)
+function(explicit_map_components_to_libraries out_libs)
   set( link_components ${ARGN} )
   foreach(c ${link_components})
     # add codegen/asmprinter
@@ -121,7 +125,7 @@ function(msvc_map_components_to_libraries out_libs)
   endwhile( ${curr_idx} LESS ${lst_size} )
   list(REMOVE_DUPLICATES result)
   set(${out_libs} ${result} PARENT_SCOPE)
-endfunction(msvc_map_components_to_libraries)
+endfunction(explicit_map_components_to_libraries)
 
 
 macro(nix_llvm_config executable)
diff --git a/configure b/configure
index 1179524853f1..9a30c297cd37 100755
--- a/configure
+++ b/configure
@@ -1533,12 +1533,15 @@ if test -n "$ac_init_help"; then
 Optional Features:
   --disable-FEATURE       do not include FEATURE (same as --enable-FEATURE=no)
   --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
-  --enable-optimized
-  --enable-profiling
-  --enable-assertions
+  --enable-optimized      Compile with optimizations enabled (default is NO)
+  --enable-profiling      Compile with profiling enabled (default is NO)
+  --enable-assertions     Compile with assertion checks enabled (default is
+                          YES)
   --enable-expensive-checks
-
-  --enable-debug-runtime
+                          Compile with expensive debug checks enabled (default
+                          is NO)
+  --enable-debug-runtime  Build runtime libs with debug symbols (default is
+                          NO)
   --enable-jit            Enable Just In Time Compiling (default is YES)
   --enable-doxygen        Build doxygen documentation (default is NO)
   --enable-threads        Use threads if available (default is YES)
@@ -1550,6 +1553,7 @@ Optional Features:
                           %a (default is YES)
   --enable-bindings       Build specific language bindings:
                           all,auto,none,{binding-name} (default=auto)
+  --enable-libffi         Check for the presence of libffi (default is YES)
   --enable-ltdl-install   install libltdl
   --enable-shared[=PKGS]  build shared libraries
                           [default=yes]
@@ -5111,6 +5115,21 @@ echo "$as_me: error: Invalid path to directory containing plugin-api.h." >&2;}
   fi
 fi
 
+# Check whether --enable-libffi was given.
+if test "${enable_libffi+set}" = set; then
+  enableval=$enable_libffi;
+else
+  enableval=yes
+fi
+
+case "$enableval" in
+  yes) llvm_cv_enable_libffi="yes" ;;
+  no)  llvm_cv_enable_libffi="no"  ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-libffi. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-libffi. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
 
 ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
@@ -10575,7 +10594,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 10578 "configure"
+#line 10597 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -12719,7 +12738,7 @@ ia64-*-hpux*)
   ;;
 *-*-irix6*)
   # Find out which ABI we are using.
-  echo '#line 12722 "configure"' > conftest.$ac_ext
+  echo '#line 12741 "configure"' > conftest.$ac_ext
   if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
   (eval $ac_compile) 2>&5
   ac_status=$?
@@ -14437,11 +14456,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:14440: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:14459: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:14444: \$? = $ac_status" >&5
+   echo "$as_me:14463: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -14705,11 +14724,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:14708: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:14727: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:14712: \$? = $ac_status" >&5
+   echo "$as_me:14731: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -14809,11 +14828,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:14812: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:14831: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:14816: \$? = $ac_status" >&5
+   echo "$as_me:14835: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -17261,7 +17280,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 17264 "configure"
+#line 17283 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -17361,7 +17380,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 17364 "configure"
+#line 17383 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -19729,11 +19748,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:19732: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:19751: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:19736: \$? = $ac_status" >&5
+   echo "$as_me:19755: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -19833,11 +19852,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:19836: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:19855: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:19840: \$? = $ac_status" >&5
+   echo "$as_me:19859: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -21403,11 +21422,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:21406: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:21425: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:21410: \$? = $ac_status" >&5
+   echo "$as_me:21429: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -21507,11 +21526,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:21510: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:21529: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:21514: \$? = $ac_status" >&5
+   echo "$as_me:21533: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -23742,11 +23761,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:23745: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:23764: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:23749: \$? = $ac_status" >&5
+   echo "$as_me:23768: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -24010,11 +24029,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:24013: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:24032: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:24017: \$? = $ac_status" >&5
+   echo "$as_me:24036: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -24114,11 +24133,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:24117: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:24136: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:24121: \$? = $ac_status" >&5
+   echo "$as_me:24140: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -27473,7 +27492,8 @@ echo "$as_me: WARNING: dlopen() not found - disabling plugin support" >&2;}
 fi
 
 
-{ echo "$as_me:$LINENO: checking for library containing ffi_call" >&5
+if test "$llvm_cv_enable_libffi" = "yes" ; then
+  { echo "$as_me:$LINENO: checking for library containing ffi_call" >&5
 echo $ECHO_N "checking for library containing ffi_call... $ECHO_C" >&6; }
 if test "${ac_cv_search_ffi_call+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
@@ -27579,6 +27599,7 @@ else
 echo "$as_me: WARNING: libffi not found - disabling external calls from interpreter" >&2;}
 fi
 
+fi
 
 { echo "$as_me:$LINENO: checking for library containing mallinfo" >&5
 echo $ECHO_N "checking for library containing mallinfo... $ECHO_C" >&6; }
@@ -29824,6 +29845,7 @@ else
 
 fi
 
+if test "$llvm_cv_enable_libffi" = "yes" ; then
 
 
 for ac_header in ffi.h ffi/ffi.h
@@ -29994,6 +30016,7 @@ fi
 
 done
 
+fi
 
 
 
diff --git a/docs/CMake.html b/docs/CMake.html
index b329ca46d62e..ac3b57a84893 100644
--- a/docs/CMake.html
+++ b/docs/CMake.html
@@ -248,8 +248,8 @@
   <dt><b>LLVM_ENABLE_THREADS</b>:BOOL</dt>
   <dd>Build with threads support, if available. Defaults to ON.</dd>
 
-  <dt><b>LLVM_ENABLE_ASSERTS</b>:BOOL</dt>
-  <dd>Enables code asserts. Defaults to ON if and only if
+  <dt><b>LLVM_ENABLE_ASSERTIONS</b>:BOOL</dt>
+  <dd>Enables code assertions. Defaults to ON if and only if
     CMAKE_BUILD_TYPE is <i>Release</i>.</dd>
 
   <dt><b>LLVM_ENABLE_PIC</b>:BOOL</dt>
diff --git a/docs/GoldPlugin.html b/docs/GoldPlugin.html
index 6be5277efd16..17a50aca6e99 100644
--- a/docs/GoldPlugin.html
+++ b/docs/GoldPlugin.html
@@ -153,12 +153,21 @@ $ llvm-gcc -use-gold-plugin a.o b.o -o main # &lt;-- link with LLVMgold plugin
     <pre class="doc_code">
 export CC="$PREFIX/bin/llvm-gcc -use-gold-plugin"
 export CXX="$PREFIX/bin/llvm-g++ -use-gold-plugin"
-export AR="$PREFIX/bin/ar --plugin libLLVMgold.so"
-export NM="$PREFIX/bin/nm --plugin libLLVMgold.so"
+export AR="$PREFIX/bin/ar"
+export NM="$PREFIX/bin/nm"
 export RANLIB=/bin/true #ranlib is not needed, and doesn't support .bc files in .a
 export CFLAGS="-O4"
      </pre>
      </li>
+     <li>Or you can just set your path:
+    <pre class="doc_code">
+export PATH="$PREFIX/bin:$PATH"
+export CC="llvm-gcc -use-gold-plugin"
+export CXX="llvm-g++ -use-gold-plugin"
+export RANLIB=/bin/true
+export CFLAGS="-O4"
+     </pre>
+     </li>
      <li>Configure &amp; build the project as usual: <tt>./configure &amp;&amp; make &amp;&amp; make check</tt> </li>
    </ul>
    <p> The environment variable settings may work for non-autotooled projects
diff --git a/docs/LangRef.html b/docs/LangRef.html
index f0a171be8af5..89d4f936569f 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -89,8 +89,11 @@
       <li><a href="#binaryops">Binary Operations</a>
         <ol>
           <li><a href="#i_add">'<tt>add</tt>' Instruction</a></li>
+          <li><a href="#i_fadd">'<tt>fadd</tt>' Instruction</a></li>
           <li><a href="#i_sub">'<tt>sub</tt>' Instruction</a></li>
+          <li><a href="#i_fsub">'<tt>fsub</tt>' Instruction</a></li>
           <li><a href="#i_mul">'<tt>mul</tt>' Instruction</a></li>
+          <li><a href="#i_fmul">'<tt>fmul</tt>' Instruction</a></li>
           <li><a href="#i_udiv">'<tt>udiv</tt>' Instruction</a></li>
           <li><a href="#i_sdiv">'<tt>sdiv</tt>' Instruction</a></li>
           <li><a href="#i_fdiv">'<tt>fdiv</tt>' Instruction</a></li>
@@ -2503,16 +2506,15 @@ The result value has the same type as its operands.</p>
 <h5>Arguments:</h5>
 
 <p>The two arguments to the '<tt>add</tt>' instruction must be <a
- href="#t_integer">integer</a>, <a href="#t_floating">floating point</a>, or
- <a href="#t_vector">vector</a> values. Both arguments must have identical
- types.</p>
+ href="#t_integer">integer</a> or
+ <a href="#t_vector">vector</a> of integer values. Both arguments must
+ have identical types.</p>
 
 <h5>Semantics:</h5>
 
-<p>The value produced is the integer or floating point sum of the two
-operands.</p>
+<p>The value produced is the integer sum of the two operands.</p>
 
-<p>If an integer sum has unsigned overflow, the result returned is the
+<p>If the sum has unsigned overflow, the result returned is the
 mathematical result modulo 2<sup>n</sup>, where n is the bit width of
 the result.</p>
 
@@ -2527,6 +2529,39 @@ instruction is appropriate for both signed and unsigned integers.</p>
 </div>
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
+  <a name="i_fadd">'<tt>fadd</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+
+<pre>
+  &lt;result&gt; = fadd &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+
+<p>The '<tt>fadd</tt>' instruction returns the sum of its two operands.</p>
+
+<h5>Arguments:</h5>
+
+<p>The two arguments to the '<tt>fadd</tt>' instruction must be
+<a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
+floating point values. Both arguments must have identical types.</p>
+
+<h5>Semantics:</h5>
+
+<p>The value produced is the floating point sum of the two operands.</p>
+
+<h5>Example:</h5>
+
+<pre>
+  &lt;result&gt; = fadd float 4.0, %var          <i>; yields {float}:result = 4.0 + %var</i>
+</pre>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
    <a name="i_sub">'<tt>sub</tt>' Instruction</a>
 </div>
 
@@ -2550,16 +2585,14 @@ representations.</p>
 <h5>Arguments:</h5>
 
 <p>The two arguments to the '<tt>sub</tt>' instruction must be <a
- href="#t_integer">integer</a>, <a href="#t_floating">floating point</a>,
- or <a href="#t_vector">vector</a> values.  Both arguments must have identical
- types.</p>
+ href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of
+ integer values.  Both arguments must have identical types.</p>
 
 <h5>Semantics:</h5>
 
-<p>The value produced is the integer or floating point difference of
-the two operands.</p>
+<p>The value produced is the integer difference of the two operands.</p>
 
-<p>If an integer difference has unsigned overflow, the result returned is the
+<p>If the difference has unsigned overflow, the result returned is the
 mathematical result modulo 2<sup>n</sup>, where n is the bit width of
 the result.</p>
 
@@ -2575,6 +2608,45 @@ instruction is appropriate for both signed and unsigned integers.</p>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
+   <a name="i_fsub">'<tt>fsub</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+
+<pre>
+  &lt;result&gt; = fsub &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+
+<p>The '<tt>fsub</tt>' instruction returns the difference of its two
+operands.</p>
+
+<p>Note that the '<tt>fsub</tt>' instruction is used to represent the
+'<tt>fneg</tt>' instruction present in most other intermediate
+representations.</p>
+
+<h5>Arguments:</h5>
+
+<p>The two arguments to the '<tt>fsub</tt>' instruction must be <a
+ <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a>
+ of floating point values.  Both arguments must have identical types.</p>
+
+<h5>Semantics:</h5>
+
+<p>The value produced is the floating point difference of the two operands.</p>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = fsub float 4.0, %var           <i>; yields {float}:result = 4.0 - %var</i>
+  &lt;result&gt; = fsub float -0.0, %val          <i>; yields {float}:result = -%var</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
   <a name="i_mul">'<tt>mul</tt>' Instruction</a>
 </div>
 
@@ -2590,16 +2662,14 @@ operands.</p>
 <h5>Arguments:</h5>
 
 <p>The two arguments to the '<tt>mul</tt>' instruction must be <a
-href="#t_integer">integer</a>, <a href="#t_floating">floating point</a>,
-or <a href="#t_vector">vector</a> values.  Both arguments must have identical
-types.</p>
+href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
+values.  Both arguments must have identical types.</p>
  
 <h5>Semantics:</h5>
 
-<p>The value produced is the integer or floating point product of the
-two operands.</p>
+<p>The value produced is the integer product of the two operands.</p>
 
-<p>If the result of an integer multiplication has unsigned overflow,
+<p>If the result of the multiplication has unsigned overflow,
 the result returned is the mathematical result modulo 
 2<sup>n</sup>, where n is the bit width of the result.</p>
 <p>Because LLVM integers use a two's complement representation, and the
@@ -2614,6 +2684,35 @@ width of the full product.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="i_fmul">'<tt>fmul</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>  &lt;result&gt; = fmul &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+</pre>
+<h5>Overview:</h5>
+<p>The  '<tt>fmul</tt>' instruction returns the product of its two
+operands.</p>
+
+<h5>Arguments:</h5>
+
+<p>The two arguments to the '<tt>fmul</tt>' instruction must be
+<a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a>
+of floating point values.  Both arguments must have identical types.</p>
+
+<h5>Semantics:</h5>
+
+<p>The value produced is the floating point product of the two operands.</p>
+
+<h5>Example:</h5>
+<pre>  &lt;result&gt; = fmul float 4.0, %var          <i>; yields {float}:result = 4.0 * %var</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="i_udiv">'<tt>udiv</tt>' Instruction
 </a></div>
 <div class="doc_text">
@@ -7110,7 +7209,7 @@ declare void @llvm.stackprotector( i8* &lt;guard&gt;, i8** &lt;slot&gt; )
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-06-03 12:20:10 +0200 (Wed, 03 Jun 2009) $
+  Last modified: $Date: 2009-06-05 00:49:04 +0200 (Fri, 05 Jun 2009) $
 </address>
 
 </body>
diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h
index 819861769084..5d92fc5af013 100644
--- a/include/llvm-c/lto.h
+++ b/include/llvm-c/lto.h
@@ -19,6 +19,8 @@
 #include <stdbool.h>
 #include <stddef.h>
 
+#define LTO_API_VERSION 3
+
 typedef enum {
     LTO_SYMBOL_ALIGNMENT_MASK         = 0x0000001F,    /* log2 of alignment */
     LTO_SYMBOL_PERMISSIONS_MASK       = 0x000000E0,    
@@ -208,6 +210,14 @@ lto_codegen_set_gcc_path(lto_code_gen_t cg, const char* path);
 
 
 /**
+ * Sets the location of the assembler tool to run. If not set, libLTO
+ * will use gcc to invoke the assembler.
+ */
+extern void
+lto_codegen_set_assembler_path(lto_code_gen_t cg, const char* path);
+
+
+/**
  * Adds to a list of all global symbols that must exist in the final
  * generated code.  If a function is not listed, it might be
  * inlined into every usage and optimized away.
diff --git a/include/llvm/Analysis/Dominators.h b/include/llvm/Analysis/Dominators.h
index b405f5b71ed7..347e239d8ea7 100644
--- a/include/llvm/Analysis/Dominators.h
+++ b/include/llvm/Analysis/Dominators.h
@@ -270,12 +270,17 @@ protected:
         NewBBIDom = PredBlocks[i];
         break;
       }
-    assert(i != PredBlocks.size() && "No reachable preds?");
+
+    // It's possible that none of the predecessors of NewBB are reachable;
+    // in that case, NewBB itself is unreachable, so nothing needs to be
+    // changed.
+    if (!NewBBIDom)
+      return;
+
     for (i = i + 1; i < PredBlocks.size(); ++i) {
       if (DT.isReachableFromEntry(PredBlocks[i]))
         NewBBIDom = DT.findNearestCommonDominator(NewBBIDom, PredBlocks[i]);
     }
-    assert(NewBBIDom && "No immediate dominator found??");
 
     // Create the new dominator tree node... and set the idom of NewBB.
     DomTreeNodeBase<NodeT> *NewBBNode = DT.addNewBlock(NewBB, NewBBIDom);
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 7e0de47dc4f3..b40fbf06f9c8 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -60,12 +60,7 @@ namespace llvm {
     /// canonical induction variable of the specified type for the specified
     /// loop (inserting one if there is none).  A canonical induction variable
     /// starts at zero and steps by one on each iteration.
-    Value *getOrInsertCanonicalInductionVariable(const Loop *L, const Type *Ty){
-      assert(Ty->isInteger() && "Can only insert integer induction variables!");
-      SCEVHandle H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty),
-                                      SE.getIntegerSCEV(1, Ty), L);
-      return expand(H);
-    }
+    Value *getOrInsertCanonicalInductionVariable(const Loop *L, const Type *Ty);
 
     /// addInsertedValue - Remember the specified instruction as being the
     /// canonical form for the specified SCEV.
diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h
index 972dbfa518df..a594e3223d4f 100644
--- a/include/llvm/Attributes.h
+++ b/include/llvm/Attributes.h
@@ -54,13 +54,17 @@ const Attributes Alignment = 31<<16; ///< Alignment of parameter (5 bits)
                                      // stored as log2 of alignment with +1 bias
                                      // 0 means unaligned different from align 1
 const Attributes NoCapture = 1<<21; ///< Function creates no aliases of pointer
+const Attributes NoRedZone = 1<<22; /// disable redzone
+const Attributes NoImplicitFloat = 1<<23; /// disable implicit floating point
+                                          /// instructions.
 
 /// @brief Attributes that only apply to function parameters.
 const Attributes ParameterOnly = ByVal | Nest | StructRet | NoCapture;
 
 /// @brief Attributes that only apply to function.
 const Attributes FunctionOnly = NoReturn | NoUnwind | ReadNone | ReadOnly | 
-  NoInline | AlwaysInline | OptimizeForSize | StackProtect | StackProtectReq;
+  NoInline | AlwaysInline | OptimizeForSize | StackProtect | StackProtectReq |
+  NoRedZone | NoImplicitFloat;
 
 /// @brief Parameter attributes that do not apply to vararg call arguments.
 const Attributes VarArgsIncompatible = StructRet;
diff --git a/include/llvm/CodeGen/JITCodeEmitter.h b/include/llvm/CodeGen/JITCodeEmitter.h
index 81a7c60bdb60..bf6b76ee9001 100644
--- a/include/llvm/CodeGen/JITCodeEmitter.h
+++ b/include/llvm/CodeGen/JITCodeEmitter.h
@@ -89,7 +89,7 @@ public:
   /// emitByte - This callback is invoked when a byte needs to be written to the
   /// output stream.
   ///
-  void emitByte(unsigned char B) {
+  void emitByte(uint8_t B) {
     if (CurBufferPtr != BufferEnd)
       *CurBufferPtr++ = B;
   }
@@ -99,10 +99,10 @@ public:
   ///
   void emitWordLE(unsigned W) {
     if (4 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (unsigned char)(W >>  0);
-      *CurBufferPtr++ = (unsigned char)(W >>  8);
-      *CurBufferPtr++ = (unsigned char)(W >> 16);
-      *CurBufferPtr++ = (unsigned char)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
     } else {
       CurBufferPtr = BufferEnd;
     }
@@ -113,10 +113,10 @@ public:
   ///
   void emitWordBE(unsigned W) {
     if (4 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (unsigned char)(W >> 24);
-      *CurBufferPtr++ = (unsigned char)(W >> 16);
-      *CurBufferPtr++ = (unsigned char)(W >>  8);
-      *CurBufferPtr++ = (unsigned char)(W >>  0);
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
     } else {
       CurBufferPtr = BufferEnd;
     }
@@ -127,14 +127,14 @@ public:
   ///
   void emitDWordLE(uint64_t W) {
     if (8 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (unsigned char)(W >>  0);
-      *CurBufferPtr++ = (unsigned char)(W >>  8);
-      *CurBufferPtr++ = (unsigned char)(W >> 16);
-      *CurBufferPtr++ = (unsigned char)(W >> 24);
-      *CurBufferPtr++ = (unsigned char)(W >> 32);
-      *CurBufferPtr++ = (unsigned char)(W >> 40);
-      *CurBufferPtr++ = (unsigned char)(W >> 48);
-      *CurBufferPtr++ = (unsigned char)(W >> 56);
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >> 32);
+      *CurBufferPtr++ = (uint8_t)(W >> 40);
+      *CurBufferPtr++ = (uint8_t)(W >> 48);
+      *CurBufferPtr++ = (uint8_t)(W >> 56);
     } else {
       CurBufferPtr = BufferEnd;
     }
@@ -145,14 +145,14 @@ public:
   ///
   void emitDWordBE(uint64_t W) {
     if (8 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (unsigned char)(W >> 56);
-      *CurBufferPtr++ = (unsigned char)(W >> 48);
-      *CurBufferPtr++ = (unsigned char)(W >> 40);
-      *CurBufferPtr++ = (unsigned char)(W >> 32);
-      *CurBufferPtr++ = (unsigned char)(W >> 24);
-      *CurBufferPtr++ = (unsigned char)(W >> 16);
-      *CurBufferPtr++ = (unsigned char)(W >>  8);
-      *CurBufferPtr++ = (unsigned char)(W >>  0);
+      *CurBufferPtr++ = (uint8_t)(W >> 56);
+      *CurBufferPtr++ = (uint8_t)(W >> 48);
+      *CurBufferPtr++ = (uint8_t)(W >> 40);
+      *CurBufferPtr++ = (uint8_t)(W >> 32);
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
     } else {
       CurBufferPtr = BufferEnd;
     }
@@ -166,8 +166,8 @@ public:
     if(Alignment <= (uintptr_t)(BufferEnd-CurBufferPtr)) {
       // Move the current buffer ptr up to the specified alignment.
       CurBufferPtr =
-        (unsigned char*)(((uintptr_t)CurBufferPtr+Alignment-1) &
-                         ~(uintptr_t)(Alignment-1));
+        (uint8_t*)(((uintptr_t)CurBufferPtr+Alignment-1) &
+                   ~(uintptr_t)(Alignment-1));
     } else {
       CurBufferPtr = BufferEnd;
     }
@@ -178,7 +178,7 @@ public:
   /// written to the output stream.
   void emitULEB128Bytes(unsigned Value) {
     do {
-      unsigned char Byte = Value & 0x7f;
+      uint8_t Byte = Value & 0x7f;
       Value >>= 7;
       if (Value) Byte |= 0x80;
       emitByte(Byte);
@@ -187,12 +187,12 @@ public:
   
   /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
   /// written to the output stream.
-  void emitSLEB128Bytes(int Value) {
-    int Sign = Value >> (8 * sizeof(Value) - 1);
+  void emitSLEB128Bytes(int32_t Value) {
+    int32_t Sign = Value >> (8 * sizeof(Value) - 1);
     bool IsMore;
   
     do {
-      unsigned char Byte = Value & 0x7f;
+      uint8_t Byte = Value & 0x7f;
       Value >>= 7;
       IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
       if (IsMore) Byte |= 0x80;
@@ -205,14 +205,14 @@ public:
   void emitString(const std::string &String) {
     for (unsigned i = 0, N = static_cast<unsigned>(String.size());
          i < N; ++i) {
-      unsigned char C = String[i];
+      uint8_t C = String[i];
       emitByte(C);
     }
     emitByte(0);
   }
   
   /// emitInt32 - Emit a int32 directive.
-  void emitInt32(int Value) {
+  void emitInt32(int32_t Value) {
     if (4 <= BufferEnd-CurBufferPtr) {
       *((uint32_t*)CurBufferPtr) = Value;
       CurBufferPtr += 4;
diff --git a/include/llvm/CodeGen/MachineCodeEmitter.h b/include/llvm/CodeGen/MachineCodeEmitter.h
index 226c4c2ad8fb..aaa41a48cb27 100644
--- a/include/llvm/CodeGen/MachineCodeEmitter.h
+++ b/include/llvm/CodeGen/MachineCodeEmitter.h
@@ -50,14 +50,14 @@ class MachineCodeEmitter {
 protected:
   /// BufferBegin/BufferEnd - Pointers to the start and end of the memory
   /// allocated for this code buffer.
-  unsigned char *BufferBegin, *BufferEnd;
+  uint8_t *BufferBegin, *BufferEnd;
   
   /// CurBufferPtr - Pointer to the next byte of memory to fill when emitting 
   /// code.  This is guranteed to be in the range [BufferBegin,BufferEnd].  If
   /// this pointer is at BufferEnd, it will never move due to code emission, and
   /// all code emission requests will be ignored (this is the buffer overflow
   /// condition).
-  unsigned char *CurBufferPtr;
+  uint8_t *CurBufferPtr;
 
 public:
   virtual ~MachineCodeEmitter() {}
@@ -96,7 +96,7 @@ public:
   /// emitByte - This callback is invoked when a byte needs to be written to the
   /// output stream.
   ///
-  void emitByte(unsigned char B) {
+  void emitByte(uint8_t B) {
     if (CurBufferPtr != BufferEnd)
       *CurBufferPtr++ = B;
   }
@@ -106,10 +106,10 @@ public:
   ///
   void emitWordLE(unsigned W) {
     if (4 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (unsigned char)(W >>  0);
-      *CurBufferPtr++ = (unsigned char)(W >>  8);
-      *CurBufferPtr++ = (unsigned char)(W >> 16);
-      *CurBufferPtr++ = (unsigned char)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
     } else {
       CurBufferPtr = BufferEnd;
     }
@@ -120,10 +120,10 @@ public:
   ///
   void emitWordBE(unsigned W) {
     if (4 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (unsigned char)(W >> 24);
-      *CurBufferPtr++ = (unsigned char)(W >> 16);
-      *CurBufferPtr++ = (unsigned char)(W >>  8);
-      *CurBufferPtr++ = (unsigned char)(W >>  0);
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
     } else {
       CurBufferPtr = BufferEnd;
     }
@@ -134,14 +134,14 @@ public:
   ///
   void emitDWordLE(uint64_t W) {
     if (8 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (unsigned char)(W >>  0);
-      *CurBufferPtr++ = (unsigned char)(W >>  8);
-      *CurBufferPtr++ = (unsigned char)(W >> 16);
-      *CurBufferPtr++ = (unsigned char)(W >> 24);
-      *CurBufferPtr++ = (unsigned char)(W >> 32);
-      *CurBufferPtr++ = (unsigned char)(W >> 40);
-      *CurBufferPtr++ = (unsigned char)(W >> 48);
-      *CurBufferPtr++ = (unsigned char)(W >> 56);
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >> 32);
+      *CurBufferPtr++ = (uint8_t)(W >> 40);
+      *CurBufferPtr++ = (uint8_t)(W >> 48);
+      *CurBufferPtr++ = (uint8_t)(W >> 56);
     } else {
       CurBufferPtr = BufferEnd;
     }
@@ -152,14 +152,14 @@ public:
   ///
   void emitDWordBE(uint64_t W) {
     if (8 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (unsigned char)(W >> 56);
-      *CurBufferPtr++ = (unsigned char)(W >> 48);
-      *CurBufferPtr++ = (unsigned char)(W >> 40);
-      *CurBufferPtr++ = (unsigned char)(W >> 32);
-      *CurBufferPtr++ = (unsigned char)(W >> 24);
-      *CurBufferPtr++ = (unsigned char)(W >> 16);
-      *CurBufferPtr++ = (unsigned char)(W >>  8);
-      *CurBufferPtr++ = (unsigned char)(W >>  0);
+      *CurBufferPtr++ = (uint8_t)(W >> 56);
+      *CurBufferPtr++ = (uint8_t)(W >> 48);
+      *CurBufferPtr++ = (uint8_t)(W >> 40);
+      *CurBufferPtr++ = (uint8_t)(W >> 32);
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
     } else {
       CurBufferPtr = BufferEnd;
     }
@@ -173,8 +173,8 @@ public:
     if(Alignment <= (uintptr_t)(BufferEnd-CurBufferPtr)) {
       // Move the current buffer ptr up to the specified alignment.
       CurBufferPtr =
-        (unsigned char*)(((uintptr_t)CurBufferPtr+Alignment-1) &
-                         ~(uintptr_t)(Alignment-1));
+        (uint8_t*)(((uintptr_t)CurBufferPtr+Alignment-1) &
+                   ~(uintptr_t)(Alignment-1));
     } else {
       CurBufferPtr = BufferEnd;
     }
@@ -185,7 +185,7 @@ public:
   /// written to the output stream.
   void emitULEB128Bytes(unsigned Value) {
     do {
-      unsigned char Byte = Value & 0x7f;
+      uint8_t Byte = Value & 0x7f;
       Value >>= 7;
       if (Value) Byte |= 0x80;
       emitByte(Byte);
@@ -194,12 +194,12 @@ public:
   
   /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
   /// written to the output stream.
-  void emitSLEB128Bytes(int Value) {
-    int Sign = Value >> (8 * sizeof(Value) - 1);
+  void emitSLEB128Bytes(int32_t Value) {
+    int32_t Sign = Value >> (8 * sizeof(Value) - 1);
     bool IsMore;
   
     do {
-      unsigned char Byte = Value & 0x7f;
+      uint8_t Byte = Value & 0x7f;
       Value >>= 7;
       IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
       if (IsMore) Byte |= 0x80;
@@ -212,14 +212,14 @@ public:
   void emitString(const std::string &String) {
     for (unsigned i = 0, N = static_cast<unsigned>(String.size());
          i < N; ++i) {
-      unsigned char C = String[i];
+      uint8_t C = String[i];
       emitByte(C);
     }
     emitByte(0);
   }
   
   /// emitInt32 - Emit a int32 directive.
-  void emitInt32(int Value) {
+  void emitInt32(int32_t Value) {
     if (4 <= BufferEnd-CurBufferPtr) {
       *((uint32_t*)CurBufferPtr) = Value;
       CurBufferPtr += 4;
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index caabf0779aba..33e2e002e231 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -18,17 +18,17 @@
 #undef ENABLE_CBE_PRINTF_A
 
 /* Define if position independent code is enabled */
-#undef ENABLE_PIC
+#cmakedefine ENABLE_PIC ${ENABLE_PIC}
 
 /* Define if threads enabled */
 #cmakedefine ENABLE_THREADS ${ENABLE_THREADS}
 
 /* Define to 1 if you have `alloca', as a function or macro. */
-#undef HAVE_ALLOCA
+#cmakedefine HAVE_ALLOCA ${HAVE_ALLOCA}
 
 /* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
    */
-#undef HAVE_ALLOCA_H
+#cmakedefine HAVE_ALLOCA_H ${HAVE_ALLOCA_H}
 
 /* Define to 1 if you have the `argz_append' function. */
 #undef HAVE_ARGZ_APPEND
@@ -228,7 +228,7 @@
 #cmakedefine HAVE_MALLOC_MALLOC_H ${HAVE_MALLOC_MALLOC_H}
 
 /* Define to 1 if you have the `malloc_zone_statistics' function. */
-#undef HAVE_MALLOC_ZONE_STATISTICS
+#cmakedefine HAVE_MALLOC_ZONE_STATISTICS ${HAVE_MALLOC_ZONE_STATISTICS}
 
 /* Define to 1 if you have the `memcpy' function. */
 #undef HAVE_MEMCPY
@@ -414,7 +414,7 @@
 #cmakedefine HAVE_SYS_TYPES_H ${HAVE_SYS_TYPES_H}
 
 /* Define to 1 if you have <sys/wait.h> that is POSIX.1 compatible. */
-#undef HAVE_SYS_WAIT_H
+#cmakedefine HAVE_SYS_WAIT_H ${HAVE_SYS_WAIT_H}
 
 /* Define to 1 if the system has the type `uint64_t'. */
 #undef HAVE_UINT64_T
diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h
index 9e95a08fc962..ed0fe2740f92 100644
--- a/include/llvm/Constants.h
+++ b/include/llvm/Constants.h
@@ -704,10 +704,14 @@ public:
   /// specify the full Instruction::OPCODE identifier.
   ///
   static Constant *getNeg(Constant *C);
+  static Constant *getFNeg(Constant *C);
   static Constant *getNot(Constant *C);
   static Constant *getAdd(Constant *C1, Constant *C2);
+  static Constant *getFAdd(Constant *C1, Constant *C2);
   static Constant *getSub(Constant *C1, Constant *C2);
+  static Constant *getFSub(Constant *C1, Constant *C2);
   static Constant *getMul(Constant *C1, Constant *C2);
+  static Constant *getFMul(Constant *C1, Constant *C2);
   static Constant *getUDiv(Constant *C1, Constant *C2);
   static Constant *getSDiv(Constant *C1, Constant *C2);
   static Constant *getFDiv(Constant *C1, Constant *C2);
diff --git a/include/llvm/ExecutionEngine/JITMemoryManager.h b/include/llvm/ExecutionEngine/JITMemoryManager.h
index 581300e6e370..688a1626d2c3 100644
--- a/include/llvm/ExecutionEngine/JITMemoryManager.h
+++ b/include/llvm/ExecutionEngine/JITMemoryManager.h
@@ -60,7 +60,7 @@ public:
   
   /// getGOTBase - If this is managing a Global Offset Table, this method should
   /// return a pointer to its base.
-  virtual unsigned char *getGOTBase() const = 0;
+  virtual uint8_t *getGOTBase() const = 0;
   
   /// SetDlsymTable - If the JIT must be able to relocate stubs after they have
   /// been emitted, potentially because they are being copied to a process
@@ -89,8 +89,8 @@ public:
   /// emit the function, so it doesn't pass in the size.  Instead, this method
   /// is required to pass back a "valid size".  The JIT will be careful to not
   /// write more than the returned ActualSize bytes of memory. 
-  virtual unsigned char *startFunctionBody(const Function *F, 
-                                           uintptr_t &ActualSize) = 0;
+  virtual uint8_t *startFunctionBody(const Function *F, 
+                                     uintptr_t &ActualSize) = 0;
   
   /// allocateStub - This method is called by the JIT to allocate space for a
   /// function stub (used to handle limited branch displacements) while it is
@@ -100,9 +100,8 @@ public:
   /// thunk for it.  The stub should be "close" to the current function body,
   /// but should not be included in the 'actualsize' returned by
   /// startFunctionBody.
-  virtual unsigned char *allocateStub(const GlobalValue* F, unsigned StubSize,
-                                      unsigned Alignment) =0;
-  
+  virtual uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
+                                unsigned Alignment) = 0;
   
   /// endFunctionBody - This method is called when the JIT is done codegen'ing
   /// the specified function.  At this point we know the size of the JIT
@@ -110,11 +109,11 @@ public:
   /// the startFunctionBody method) and FunctionEnd which is a pointer to the 
   /// actual end of the function.  This method should mark the space allocated
   /// and remember where it is in case the client wants to deallocate it.
-  virtual void endFunctionBody(const Function *F, unsigned char *FunctionStart,
-                               unsigned char *FunctionEnd) = 0;
+  virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart,
+                               uint8_t *FunctionEnd) = 0;
 
   /// allocateSpace - Allocate a memory block of the given size.
-  virtual unsigned char *allocateSpace(intptr_t Size, unsigned Alignment) = 0;
+  virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) = 0;
   
   /// deallocateMemForFunction - Free JIT memory for the specified function.
   /// This is never called when the JIT is currently emitting a function.
@@ -122,14 +121,13 @@ public:
   
   /// startExceptionTable - When we finished JITing the function, if exception
   /// handling is set, we emit the exception table.
-  virtual unsigned char* startExceptionTable(const Function* F,
-                                             uintptr_t &ActualSize) = 0;
+  virtual uint8_t* startExceptionTable(const Function* F,
+                                       uintptr_t &ActualSize) = 0;
   
   /// endExceptionTable - This method is called when the JIT is done emitting
   /// the exception table.
-  virtual void endExceptionTable(const Function *F, unsigned char *TableStart,
-                                 unsigned char *TableEnd, 
-                                 unsigned char* FrameRegister) = 0;
+  virtual void endExceptionTable(const Function *F, uint8_t *TableStart,
+                                 uint8_t *TableEnd, uint8_t* FrameRegister) = 0;
 };
 
 } // end namespace llvm.
diff --git a/include/llvm/InstrTypes.h b/include/llvm/InstrTypes.h
index 3a774d45e92c..1eab983da68a 100644
--- a/include/llvm/InstrTypes.h
+++ b/include/llvm/InstrTypes.h
@@ -204,21 +204,30 @@ public:
                                    Instruction *InsertBefore = 0);
   static BinaryOperator *CreateNeg(Value *Op, const std::string &Name,
                                    BasicBlock *InsertAtEnd);
+  static BinaryOperator *CreateFNeg(Value *Op, const std::string &Name = "",
+                                    Instruction *InsertBefore = 0);
+  static BinaryOperator *CreateFNeg(Value *Op, const std::string &Name,
+                                    BasicBlock *InsertAtEnd);
   static BinaryOperator *CreateNot(Value *Op, const std::string &Name = "",
                                    Instruction *InsertBefore = 0);
   static BinaryOperator *CreateNot(Value *Op, const std::string &Name,
                                    BasicBlock *InsertAtEnd);
 
-  /// isNeg, isNot - Check if the given Value is a NEG or NOT instruction.
+  /// isNeg, isFNeg, isNot - Check if the given Value is a
+  /// NEG, FNeg, or NOT instruction.
   ///
   static bool isNeg(const Value *V);
+  static bool isFNeg(const Value *V);
   static bool isNot(const Value *V);
 
   /// getNegArgument, getNotArgument - Helper functions to extract the
-  ///     unary argument of a NEG or NOT operation implemented via Sub or Xor.
+  ///     unary argument of a NEG, FNEG or NOT operation implemented via
+  ///     Sub, FSub, or Xor.
   ///
   static const Value *getNegArgument(const Value *BinOp);
   static       Value *getNegArgument(      Value *BinOp);
+  static const Value *getFNegArgument(const Value *BinOp);
+  static       Value *getFNegArgument(      Value *BinOp);
   static const Value *getNotArgument(const Value *BinOp);
   static       Value *getNotArgument(      Value *BinOp);
 
diff --git a/include/llvm/Instruction.def b/include/llvm/Instruction.def
index 66bf2ce96694..98fda7770548 100644
--- a/include/llvm/Instruction.def
+++ b/include/llvm/Instruction.def
@@ -105,71 +105,74 @@ HANDLE_TERM_INST  ( 6, Unreachable, UnreachableInst)
 // Standard binary operators...
  FIRST_BINARY_INST( 7)
 HANDLE_BINARY_INST( 7, Add  , BinaryOperator)
-HANDLE_BINARY_INST( 8, Sub  , BinaryOperator)
-HANDLE_BINARY_INST( 9, Mul  , BinaryOperator)
-HANDLE_BINARY_INST(10, UDiv , BinaryOperator)
-HANDLE_BINARY_INST(11, SDiv , BinaryOperator)
-HANDLE_BINARY_INST(12, FDiv , BinaryOperator)
-HANDLE_BINARY_INST(13, URem , BinaryOperator)
-HANDLE_BINARY_INST(14, SRem , BinaryOperator)
-HANDLE_BINARY_INST(15, FRem , BinaryOperator)
+HANDLE_BINARY_INST( 8, FAdd  , BinaryOperator)
+HANDLE_BINARY_INST( 9, Sub  , BinaryOperator)
+HANDLE_BINARY_INST(10, FSub  , BinaryOperator)
+HANDLE_BINARY_INST(11, Mul  , BinaryOperator)
+HANDLE_BINARY_INST(12, FMul  , BinaryOperator)
+HANDLE_BINARY_INST(13, UDiv , BinaryOperator)
+HANDLE_BINARY_INST(14, SDiv , BinaryOperator)
+HANDLE_BINARY_INST(15, FDiv , BinaryOperator)
+HANDLE_BINARY_INST(16, URem , BinaryOperator)
+HANDLE_BINARY_INST(17, SRem , BinaryOperator)
+HANDLE_BINARY_INST(18, FRem , BinaryOperator)
 
 // Logical operators (integer operands)
-HANDLE_BINARY_INST(16, Shl  , BinaryOperator) // Shift left  (logical)
-HANDLE_BINARY_INST(17, LShr , BinaryOperator) // Shift right (logical) 
-HANDLE_BINARY_INST(18, AShr , BinaryOperator) // Shift right (arithmetic)
-HANDLE_BINARY_INST(19, And  , BinaryOperator)
-HANDLE_BINARY_INST(20, Or   , BinaryOperator)
-HANDLE_BINARY_INST(21, Xor  , BinaryOperator)
-  LAST_BINARY_INST(21)
+HANDLE_BINARY_INST(19, Shl  , BinaryOperator) // Shift left  (logical)
+HANDLE_BINARY_INST(20, LShr , BinaryOperator) // Shift right (logical)
+HANDLE_BINARY_INST(21, AShr , BinaryOperator) // Shift right (arithmetic)
+HANDLE_BINARY_INST(22, And  , BinaryOperator)
+HANDLE_BINARY_INST(23, Or   , BinaryOperator)
+HANDLE_BINARY_INST(24, Xor  , BinaryOperator)
+  LAST_BINARY_INST(24)
 
 // Memory operators...
- FIRST_MEMORY_INST(22)
-HANDLE_MEMORY_INST(22, Malloc, MallocInst)  // Heap management instructions
-HANDLE_MEMORY_INST(23, Free  , FreeInst  )
-HANDLE_MEMORY_INST(24, Alloca, AllocaInst)  // Stack management
-HANDLE_MEMORY_INST(25, Load  , LoadInst  )  // Memory manipulation instrs
-HANDLE_MEMORY_INST(26, Store , StoreInst )
-HANDLE_MEMORY_INST(27, GetElementPtr, GetElementPtrInst)
-  LAST_MEMORY_INST(27)
+ FIRST_MEMORY_INST(25)
+HANDLE_MEMORY_INST(25, Malloc, MallocInst)  // Heap management instructions
+HANDLE_MEMORY_INST(26, Free  , FreeInst  )
+HANDLE_MEMORY_INST(27, Alloca, AllocaInst)  // Stack management
+HANDLE_MEMORY_INST(28, Load  , LoadInst  )  // Memory manipulation instrs
+HANDLE_MEMORY_INST(29, Store , StoreInst )
+HANDLE_MEMORY_INST(30, GetElementPtr, GetElementPtrInst)
+  LAST_MEMORY_INST(30)
 
 // Cast operators ...
 // NOTE: The order matters here because CastInst::isEliminableCastPair 
 // NOTE: (see Instructions.cpp) encodes a table based on this ordering.
- FIRST_CAST_INST(28)
-HANDLE_CAST_INST(28, Trunc   , TruncInst   )  // Truncate integers
-HANDLE_CAST_INST(29, ZExt    , ZExtInst    )  // Zero extend integers
-HANDLE_CAST_INST(30, SExt    , SExtInst    )  // Sign extend integers
-HANDLE_CAST_INST(31, FPToUI  , FPToUIInst  )  // floating point -> UInt
-HANDLE_CAST_INST(32, FPToSI  , FPToSIInst  )  // floating point -> SInt
-HANDLE_CAST_INST(33, UIToFP  , UIToFPInst  )  // UInt -> floating point
-HANDLE_CAST_INST(34, SIToFP  , SIToFPInst  )  // SInt -> floating point
-HANDLE_CAST_INST(35, FPTrunc , FPTruncInst )  // Truncate floating point
-HANDLE_CAST_INST(36, FPExt   , FPExtInst   )  // Extend floating point
-HANDLE_CAST_INST(37, PtrToInt, PtrToIntInst)  // Pointer -> Integer
-HANDLE_CAST_INST(38, IntToPtr, IntToPtrInst)  // Integer -> Pointer
-HANDLE_CAST_INST(39, BitCast , BitCastInst )  // Type cast
-  LAST_CAST_INST(39)
+ FIRST_CAST_INST(31)
+HANDLE_CAST_INST(31, Trunc   , TruncInst   )  // Truncate integers
+HANDLE_CAST_INST(32, ZExt    , ZExtInst    )  // Zero extend integers
+HANDLE_CAST_INST(33, SExt    , SExtInst    )  // Sign extend integers
+HANDLE_CAST_INST(34, FPToUI  , FPToUIInst  )  // floating point -> UInt
+HANDLE_CAST_INST(35, FPToSI  , FPToSIInst  )  // floating point -> SInt
+HANDLE_CAST_INST(36, UIToFP  , UIToFPInst  )  // UInt -> floating point
+HANDLE_CAST_INST(37, SIToFP  , SIToFPInst  )  // SInt -> floating point
+HANDLE_CAST_INST(38, FPTrunc , FPTruncInst )  // Truncate floating point
+HANDLE_CAST_INST(39, FPExt   , FPExtInst   )  // Extend floating point
+HANDLE_CAST_INST(40, PtrToInt, PtrToIntInst)  // Pointer -> Integer
+HANDLE_CAST_INST(41, IntToPtr, IntToPtrInst)  // Integer -> Pointer
+HANDLE_CAST_INST(42, BitCast , BitCastInst )  // Type cast
+  LAST_CAST_INST(42)
 
 // Other operators...
- FIRST_OTHER_INST(40)
-HANDLE_OTHER_INST(40, ICmp   , ICmpInst   )  // Integer comparison instruction
-HANDLE_OTHER_INST(41, FCmp   , FCmpInst   )  // Floating point comparison instr.
-HANDLE_OTHER_INST(42, PHI    , PHINode    )  // PHI node instruction
-HANDLE_OTHER_INST(43, Call   , CallInst   )  // Call a function
-HANDLE_OTHER_INST(44, Select , SelectInst )  // select instruction
-HANDLE_OTHER_INST(45, UserOp1, Instruction)  // May be used internally in a pass
-HANDLE_OTHER_INST(46, UserOp2, Instruction)  // Internal to passes only
-HANDLE_OTHER_INST(47, VAArg  , VAArgInst  )  // vaarg instruction
-HANDLE_OTHER_INST(48, ExtractElement, ExtractElementInst)// extract from vector
-HANDLE_OTHER_INST(49, InsertElement, InsertElementInst)  // insert into vector
-HANDLE_OTHER_INST(50, ShuffleVector, ShuffleVectorInst)  // shuffle two vectors.
-HANDLE_OTHER_INST(51, ExtractValue, ExtractValueInst)// extract from aggregate
-HANDLE_OTHER_INST(52, InsertValue, InsertValueInst)  // insert into aggregate
-HANDLE_OTHER_INST(53, VICmp  , VICmpInst  )  // Vec Int comparison instruction.
-HANDLE_OTHER_INST(54, VFCmp  , VFCmpInst  )  // Vec FP point comparison instr.
-
-  LAST_OTHER_INST(55)
+ FIRST_OTHER_INST(43)
+HANDLE_OTHER_INST(43, ICmp   , ICmpInst   )  // Integer comparison instruction
+HANDLE_OTHER_INST(44, FCmp   , FCmpInst   )  // Floating point comparison instr.
+HANDLE_OTHER_INST(45, PHI    , PHINode    )  // PHI node instruction
+HANDLE_OTHER_INST(46, Call   , CallInst   )  // Call a function
+HANDLE_OTHER_INST(47, Select , SelectInst )  // select instruction
+HANDLE_OTHER_INST(48, UserOp1, Instruction)  // May be used internally in a pass
+HANDLE_OTHER_INST(49, UserOp2, Instruction)  // Internal to passes only
+HANDLE_OTHER_INST(50, VAArg  , VAArgInst  )  // vaarg instruction
+HANDLE_OTHER_INST(51, ExtractElement, ExtractElementInst)// extract from vector
+HANDLE_OTHER_INST(52, InsertElement, InsertElementInst)  // insert into vector
+HANDLE_OTHER_INST(53, ShuffleVector, ShuffleVectorInst)  // shuffle two vectors.
+HANDLE_OTHER_INST(54, ExtractValue, ExtractValueInst)// extract from aggregate
+HANDLE_OTHER_INST(55, InsertValue, InsertValueInst)  // insert into aggregate
+HANDLE_OTHER_INST(56, VICmp  , VICmpInst  )  // Vec Int comparison instruction.
+HANDLE_OTHER_INST(57, VFCmp  , VFCmpInst  )  // Vec FP point comparison instr.
+
+  LAST_OTHER_INST(57)
 
 #undef  FIRST_TERM_INST
 #undef HANDLE_TERM_INST
diff --git a/include/llvm/Support/ConstantFolder.h b/include/llvm/Support/ConstantFolder.h
index ca8bcae85981..35065a060866 100644
--- a/include/llvm/Support/ConstantFolder.h
+++ b/include/llvm/Support/ConstantFolder.h
@@ -32,12 +32,21 @@ public:
   Constant *CreateAdd(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getAdd(LHS, RHS);
   }
+  Constant *CreateFAdd(Constant *LHS, Constant *RHS) const {
+    return ConstantExpr::getFAdd(LHS, RHS);
+  }
   Constant *CreateSub(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getSub(LHS, RHS);
   }
+  Constant *CreateFSub(Constant *LHS, Constant *RHS) const {
+    return ConstantExpr::getFSub(LHS, RHS);
+  }
   Constant *CreateMul(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getMul(LHS, RHS);
   }
+  Constant *CreateFMul(Constant *LHS, Constant *RHS) const {
+    return ConstantExpr::getFMul(LHS, RHS);
+  }
   Constant *CreateUDiv(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getUDiv(LHS, RHS);
   }
@@ -87,6 +96,9 @@ public:
   Constant *CreateNeg(Constant *C) const {
     return ConstantExpr::getNeg(C);
   }
+  Constant *CreateFNeg(Constant *C) const {
+    return ConstantExpr::getFNeg(C);
+  }
   Constant *CreateNot(Constant *C) const {
     return ConstantExpr::getNot(C);
   }
diff --git a/include/llvm/Support/IRBuilder.h b/include/llvm/Support/IRBuilder.h
index 9ef14af51d9e..7942de7857a2 100644
--- a/include/llvm/Support/IRBuilder.h
+++ b/include/llvm/Support/IRBuilder.h
@@ -175,18 +175,36 @@ public:
         return Folder.CreateAdd(LC, RC);
     return Insert(BinaryOperator::CreateAdd(LHS, RHS), Name);
   }
+  Value *CreateFAdd(Value *LHS, Value *RHS, const char *Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Folder.CreateFAdd(LC, RC);
+    return Insert(BinaryOperator::CreateFAdd(LHS, RHS), Name);
+  }
   Value *CreateSub(Value *LHS, Value *RHS, const char *Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateSub(LC, RC);
     return Insert(BinaryOperator::CreateSub(LHS, RHS), Name);
   }
+  Value *CreateFSub(Value *LHS, Value *RHS, const char *Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Folder.CreateFSub(LC, RC);
+    return Insert(BinaryOperator::CreateFSub(LHS, RHS), Name);
+  }
   Value *CreateMul(Value *LHS, Value *RHS, const char *Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateMul(LC, RC);
     return Insert(BinaryOperator::CreateMul(LHS, RHS), Name);
   }
+  Value *CreateFMul(Value *LHS, Value *RHS, const char *Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Folder.CreateMul(LC, RC);
+    return Insert(BinaryOperator::CreateFMul(LHS, RHS), Name);
+  }
   Value *CreateUDiv(Value *LHS, Value *RHS, const char *Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
diff --git a/include/llvm/Support/NoFolder.h b/include/llvm/Support/NoFolder.h
index 1dce497017bf..a49cf8424067 100644
--- a/include/llvm/Support/NoFolder.h
+++ b/include/llvm/Support/NoFolder.h
@@ -39,12 +39,21 @@ public:
   Value *CreateAdd(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateAdd(LHS, RHS);
   }
+  Value *CreateFAdd(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateFAdd(LHS, RHS);
+  }
   Value *CreateSub(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateSub(LHS, RHS);
   }
+  Value *CreateFSub(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateFSub(LHS, RHS);
+  }
   Value *CreateMul(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateMul(LHS, RHS);
   }
+  Value *CreateFMul(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateFMul(LHS, RHS);
+  }
   Value *CreateUDiv(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateUDiv(LHS, RHS);
   }
diff --git a/include/llvm/Support/PatternMatch.h b/include/llvm/Support/PatternMatch.h
index d27a7f1ed786..fda925f5a9a8 100644
--- a/include/llvm/Support/PatternMatch.h
+++ b/include/llvm/Support/PatternMatch.h
@@ -157,18 +157,36 @@ inline BinaryOp_match<LHS, RHS, Instruction::Add> m_Add(const LHS &L,
 }
 
 template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::FAdd> m_FAdd(const LHS &L,
+                                                          const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::FAdd>(L, R);
+}
+
+template<typename LHS, typename RHS>
 inline BinaryOp_match<LHS, RHS, Instruction::Sub> m_Sub(const LHS &L,
                                                         const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::Sub>(L, R);
 }
 
 template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::FSub> m_FSub(const LHS &L,
+                                                          const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::FSub>(L, R);
+}
+
+template<typename LHS, typename RHS>
 inline BinaryOp_match<LHS, RHS, Instruction::Mul> m_Mul(const LHS &L,
                                                         const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::Mul>(L, R);
 }
 
 template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::FMul> m_FMul(const LHS &L,
+                                                          const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::FMul>(L, R);
+}
+
+template<typename LHS, typename RHS>
 inline BinaryOp_match<LHS, RHS, Instruction::UDiv> m_UDiv(const LHS &L,
                                                         const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::UDiv>(L, R);
@@ -494,6 +512,35 @@ template<typename LHS>
 inline neg_match<LHS> m_Neg(const LHS &L) { return L; }
 
 
+template<typename LHS_t>
+struct fneg_match {
+  LHS_t L;
+
+  fneg_match(const LHS_t &LHS) : L(LHS) {}
+
+  template<typename OpTy>
+  bool match(OpTy *V) {
+    if (Instruction *I = dyn_cast<Instruction>(V))
+      if (I->getOpcode() == Instruction::FSub)
+        return matchIfFNeg(I->getOperand(0), I->getOperand(1));
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      if (CE->getOpcode() == Instruction::FSub)
+        return matchIfFNeg(CE->getOperand(0), CE->getOperand(1));
+    if (ConstantFP *CF = dyn_cast<ConstantFP>(V))
+      return L.match(ConstantExpr::getFNeg(CF));
+    return false;
+  }
+private:
+  bool matchIfFNeg(Value *LHS, Value *RHS) {
+    return LHS == ConstantExpr::getZeroValueForNegationExpr(LHS->getType()) &&
+           L.match(RHS);
+  }
+};
+
+template<typename LHS>
+inline fneg_match<LHS> m_FNeg(const LHS &L) { return L; }
+
+
 //===----------------------------------------------------------------------===//
 // Matchers for control flow
 //
diff --git a/include/llvm/Support/StandardPasses.h b/include/llvm/Support/StandardPasses.h
index 024c0194d4e0..5c63034a863c 100644
--- a/include/llvm/Support/StandardPasses.h
+++ b/include/llvm/Support/StandardPasses.h
@@ -60,15 +60,10 @@ namespace llvm {
   ///
   /// Internalize - Run the internalize pass.
   /// RunInliner - Use a function inlining pass.
-  /// RunSecondGlobalOpt - Run the global optimizer pass twice.
   /// VerifyEach - Run the verifier after each pass.
-  //
-  // FIXME: RunSecondGlobalOpt should go away once we resolve which of LTO or
-  // llvm-ld is better.
   static inline void createStandardLTOPasses(PassManager *PM,
                                              bool Internalize,
                                              bool RunInliner,
-                                             bool RunSecondGlobalOpt,
                                              bool VerifyEach);
 
   // Implementations
@@ -173,7 +168,6 @@ namespace llvm {
   static inline void createStandardLTOPasses(PassManager *PM,
                                              bool Internalize,
                                              bool RunInliner,
-                                             bool RunSecondGlobalOpt,
                                              bool VerifyEach) {
     // Now that composite has been compiled, scan through the module, looking
     // for a main function.  If main is defined, mark all other functions
@@ -207,8 +201,8 @@ namespace llvm {
       addOnePass(PM, createFunctionInliningPass(), VerifyEach);
 
     addOnePass(PM, createPruneEHPass(), VerifyEach);   // Remove dead EH info.
-    // Optimize globals again.
-    if (RunSecondGlobalOpt)
+    // Optimize globals again if we ran the inliner.
+    if (RunInliner)
       addOnePass(PM, createGlobalOptimizerPass(), VerifyEach);
     addOnePass(PM, createGlobalDCEPass(), VerifyEach); // Remove dead functions.
 
diff --git a/include/llvm/Support/TargetFolder.h b/include/llvm/Support/TargetFolder.h
index 172e4fe123d9..b0700c1dadd8 100644
--- a/include/llvm/Support/TargetFolder.h
+++ b/include/llvm/Support/TargetFolder.h
@@ -48,12 +48,21 @@ public:
   Constant *CreateAdd(Constant *LHS, Constant *RHS) const {
     return Fold(ConstantExpr::getAdd(LHS, RHS));
   }
+  Constant *CreateFAdd(Constant *LHS, Constant *RHS) const {
+    return Fold(ConstantExpr::getFAdd(LHS, RHS));
+  }
   Constant *CreateSub(Constant *LHS, Constant *RHS) const {
     return Fold(ConstantExpr::getSub(LHS, RHS));
   }
+  Constant *CreateFSub(Constant *LHS, Constant *RHS) const {
+    return Fold(ConstantExpr::getFSub(LHS, RHS));
+  }
   Constant *CreateMul(Constant *LHS, Constant *RHS) const {
     return Fold(ConstantExpr::getMul(LHS, RHS));
   }
+  Constant *CreateFMul(Constant *LHS, Constant *RHS) const {
+    return Fold(ConstantExpr::getFMul(LHS, RHS));
+  }
   Constant *CreateUDiv(Constant *LHS, Constant *RHS) const {
     return Fold(ConstantExpr::getUDiv(LHS, RHS));
   }
@@ -103,6 +112,9 @@ public:
   Constant *CreateNeg(Constant *C) const {
     return Fold(ConstantExpr::getNeg(C));
   }
+  Constant *CreateFNeg(Constant *C) const {
+    return Fold(ConstantExpr::getFNeg(C));
+  }
   Constant *CreateNot(Constant *C) const {
     return Fold(ConstantExpr::getNot(C));
   }
diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h
index b67d12675fef..8242f04e23ce 100644
--- a/include/llvm/Support/raw_ostream.h
+++ b/include/llvm/Support/raw_ostream.h
@@ -45,6 +45,19 @@ private:
   bool Unbuffered;
 
 public:
+  // color order matches ANSI escape sequence, don't change
+  enum Colors {
+    BLACK=0,
+    RED,
+    GREEN,
+    YELLOW,
+    BLUE,
+    MAGENTA,
+    CYAN,
+    WHITE,
+    SAVEDCOLOR
+  };
+
   explicit raw_ostream(bool unbuffered=false) : Unbuffered(unbuffered) {
     // Start out ready to flush.
     OutBufStart = OutBufEnd = OutBufCur = 0;
@@ -167,6 +180,20 @@ public:
   // Formatted output, see the format() function in Support/Format.h.
   raw_ostream &operator<<(const format_object_base &Fmt);
 
+  /// Changes the foreground color of text that will be output from this point
+  /// forward.
+  /// @param colors ANSI color to use, the special SAVEDCOLOR can be used to
+  /// change only the bold attribute, and keep colors untouched
+  /// @param bold bold/brighter text, default false
+  /// @param bg if true change the background, default: change foreground
+  /// @returns itself so it can be used within << invocations
+  virtual raw_ostream &changeColor(enum Colors colors, bool bold=false,
+                                   bool  bg=false) { return *this; }
+
+  /// Resets the colors to terminal defaults. Call this when you are done
+  /// outputting colored text, or before program exit.
+  virtual raw_ostream &resetColor() { return *this; }
+
   //===--------------------------------------------------------------------===//
   // Subclass Interface
   //===--------------------------------------------------------------------===//
@@ -243,6 +270,10 @@ public:
   /// seek - Flushes the stream and repositions the underlying file descriptor
   ///  positition to the offset specified from the beginning of the file.
   uint64_t seek(uint64_t off);
+
+  virtual raw_ostream &changeColor(enum Colors colors, bool bold=false,
+                                   bool bg=false);
+  virtual raw_ostream &resetColor();
 };
 
 /// raw_stdout_ostream - This is a stream that always prints to stdout.
diff --git a/include/llvm/System/Process.h b/include/llvm/System/Process.h
index ce19eb298190..11dbf759a6c4 100644
--- a/include/llvm/System/Process.h
+++ b/include/llvm/System/Process.h
@@ -107,6 +107,34 @@ namespace sys {
       /// console, or if the number of columns cannot be determined,
       /// this routine returns zero.
       static unsigned StandardErrColumns();
+
+      /// This function determines whether the terminal connected to standard
+      /// output supports colors. If standard output is not connected to a
+      /// terminal, this function returns false.
+      static bool StandardOutHasColors();
+
+      /// This function determines whether the terminal connected to standard
+      /// error supports colors. If standard error is not connected to a
+      /// terminal, this function returns false.
+      static bool StandardErrHasColors();
+
+      /// Whether changing colors requires the output to be flushed.
+      /// This is needed on systems that don't support escape sequences for
+      /// changing colors.
+      static bool ColorNeedsFlush();
+
+      /// This function returns the colorcode escape sequences.
+      /// If ColorNeedsFlush() is true then this function will change the colors
+      /// and return an empty escape sequence. In that case it is the
+      /// responsibility of the client to flush the output stream prior to
+      /// calling this function.
+      static const char *OutputColor(char c, bool bold, bool bg);
+
+      /// Same as OutputColor, but only enables the bold attribute.
+      static const char *OutputBold(bool bg);
+
+      /// Resets the terminals colors, or returns an escape sequence to do so.
+      static const char *ResetColor();
     /// @}
   };
 }
diff --git a/include/llvm/Target/TargetELFWriterInfo.h b/include/llvm/Target/TargetELFWriterInfo.h
index 548cc077a946..e266a71f8c85 100644
--- a/include/llvm/Target/TargetELFWriterInfo.h
+++ b/include/llvm/Target/TargetELFWriterInfo.h
@@ -25,9 +25,23 @@ namespace llvm {
     // e_machine member of the ELF header.
     unsigned short EMachine;
   public:
+
+    // Machine architectures
     enum MachineType {
-      NoMachine,
-      EM_386 = 3
+      EM_NONE = 0,     // No machine
+      EM_M32 = 1,      // AT&T WE 32100
+      EM_SPARC = 2,    // SPARC
+      EM_386 = 3,      // Intel 386
+      EM_68K = 4,      // Motorola 68000
+      EM_88K = 5,      // Motorola 88000
+      EM_486 = 6,      // Intel 486 (deprecated)
+      EM_860 = 7,      // Intel 80860
+      EM_MIPS = 8,     // MIPS R3000
+      EM_PPC = 20,     // PowerPC
+      EM_ARM = 40,     // ARM
+      EM_ALPHA = 41,   // DEC Alpha
+      EM_SPARCV9 = 43, // SPARC V9
+      EM_X86_64 = 62   // AMD64
     };
 
     explicit TargetELFWriterInfo(MachineType machine) : EMachine(machine) {}
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 163f4c5ae50e..327af275114c 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -632,7 +632,8 @@ public:
   /// It returns MVT::iAny if SelectionDAG should be responsible for
   /// determining it.
   virtual MVT getOptimalMemOpType(uint64_t Size, unsigned Align,
-                                  bool isSrcConst, bool isSrcStr) const {
+                                  bool isSrcConst, bool isSrcStr,
+                                  SelectionDAG &DAG) const {
     return MVT::iAny;
   }
   
@@ -825,11 +826,11 @@ public:
   virtual bool
   isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) const;
 
-  /// isConsecutiveLoad - Return true if LD (which must be a LoadSDNode) is
-  /// loading 'Bytes' bytes from a location that is 'Dist' units away from the
-  /// location that the 'Base' load is loading from.
-  bool isConsecutiveLoad(SDNode *LD, SDNode *Base, unsigned Bytes, int Dist,
-                         const MachineFrameInfo *MFI) const;
+  /// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a 
+  /// location that is 'Dist' units away from the location that the 'Base' load 
+  /// is loading from.
+  bool isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes,
+                         int Dist, const MachineFrameInfo *MFI) const;
 
   /// PerformDAGCombine - This method will be invoked for all target nodes and
   /// for any target-independent nodes that the target has registered with
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index 06d7d79441ec..0c74fa1f2c1e 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -73,12 +73,6 @@ namespace llvm {
   /// target FP instructions.
   extern bool UseSoftFloat;
 
-  /// NoImplicitFloat - This flag is enabled when the -no-implicit-float flag is
-  /// specified on the command line.  When this flag is on, the code generator
-  /// won't generate any implicit floating point instructions. I.e., no XMM or
-  /// x87 or vectorized memcpy/memmove instructions. This is for X86 only.
-  extern bool NoImplicitFloat;
-
   /// NoZerosInBSS - By default some codegens place zero-initialized data to
   /// .bss section. This flag disables such behaviour (necessary, e.g. for
   /// crt*.o compiling).
@@ -117,10 +111,6 @@ namespace llvm {
   /// wth earlier copy coalescing.
   extern bool StrongPHIElim;
 
-  /// DisableRedZone - This flag disables use of the "Red Zone" on
-  /// targets which would otherwise have one.
-  extern bool DisableRedZone;
-
 } // End llvm namespace
 
 #endif
diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp
index 2dea7b3ef687..2b34ad3b070d 100644
--- a/lib/Analysis/InstCount.cpp
+++ b/lib/Analysis/InstCount.cpp
@@ -19,7 +19,6 @@
 #include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/Streams.h"
 #include "llvm/ADT/Statistic.h"
-#include <ostream>
 using namespace llvm;
 
 STATISTIC(TotalInsts , "Number of instructions (of all types)");
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index de6480a66d5b..a0d3974189a2 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -24,7 +24,6 @@
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include <algorithm>
-#include <ostream>
 using namespace llvm;
 
 char LoopInfo::ID = 0;
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index f7f1849b6da8..03c5005989e7 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -80,7 +80,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
-#include <ostream>
 #include <algorithm>
 using namespace llvm;
 
@@ -2463,24 +2462,15 @@ void ScalarEvolution::forgetLoopPHIs(const Loop *L) {
 ScalarEvolution::BackedgeTakenInfo
 ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
   // If the loop has a non-one exit block count, we can't analyze it.
-  SmallVector<BasicBlock*, 8> ExitBlocks;
-  L->getExitBlocks(ExitBlocks);
-  if (ExitBlocks.size() != 1) return UnknownValue;
+  BasicBlock *ExitBlock = L->getExitBlock();
+  if (!ExitBlock)
+    return UnknownValue;
 
   // Okay, there is one exit block.  Try to find the condition that causes the
   // loop to be exited.
-  BasicBlock *ExitBlock = ExitBlocks[0];
-
-  BasicBlock *ExitingBlock = 0;
-  for (pred_iterator PI = pred_begin(ExitBlock), E = pred_end(ExitBlock);
-       PI != E; ++PI)
-    if (L->contains(*PI)) {
-      if (ExitingBlock == 0)
-        ExitingBlock = *PI;
-      else
-        return UnknownValue;   // More than one block exiting!
-    }
-  assert(ExitingBlock && "No exits from loop, something is broken!");
+  BasicBlock *ExitingBlock = L->getExitingBlock();
+  if (!ExitingBlock)
+    return UnknownValue;   // More than one block exiting!
 
   // Okay, we've computed the exiting block.  See what condition causes us to
   // exit.
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 7ba8268b508a..ef77e46fc99a 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -644,3 +644,16 @@ Value *SCEVExpander::expand(const SCEV *S) {
   InsertedExpressions[S] = V;
   return V;
 }
+
+/// getOrInsertCanonicalInductionVariable - This method returns the
+/// canonical induction variable of the specified type for the specified
+/// loop (inserting one if there is none).  A canonical induction variable
+/// starts at zero and steps by one on each iteration.
+Value *
+SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
+                                                    const Type *Ty) {
+  assert(Ty->isInteger() && "Can only insert integer induction variables!");
+  SCEVHandle H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty),
+                                  SE.getIntegerSCEV(1, Ty), L);
+  return expand(H);
+}
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 29ff8aa4f4d0..45f97b8f64b1 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -771,7 +771,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
   if (I == 0) return false;
   
   // (add x, 0.0) is guaranteed to return +0.0, not -0.0.
-  if (I->getOpcode() == Instruction::Add &&
+  if (I->getOpcode() == Instruction::FAdd &&
       isa<ConstantFP>(I->getOperand(1)) && 
       cast<ConstantFP>(I->getOperand(1))->isNullValue())
     return true;
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index f2e689017603..c5190efc4dac 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -547,6 +547,8 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(optsize);
   KEYWORD(ssp);
   KEYWORD(sspreq);
+  KEYWORD(noredzone);
+  KEYWORD(noimplicitfloat);
 
   KEYWORD(type);
   KEYWORD(opaque);
@@ -590,7 +592,9 @@ lltok::Kind LLLexer::LexIdentifier() {
   if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \
     UIntVal = Instruction::Enum; return lltok::kw_##STR; }
 
-  INSTKEYWORD(add,   Add);  INSTKEYWORD(sub,   Sub);  INSTKEYWORD(mul,   Mul);
+  INSTKEYWORD(add,   Add);  INSTKEYWORD(fadd,   FAdd);
+  INSTKEYWORD(sub,   Sub);  INSTKEYWORD(fsub,   FSub);
+  INSTKEYWORD(mul,   Mul);  INSTKEYWORD(fmul,   FMul);
   INSTKEYWORD(udiv,  UDiv); INSTKEYWORD(sdiv,  SDiv); INSTKEYWORD(fdiv,  FDiv);
   INSTKEYWORD(urem,  URem); INSTKEYWORD(srem,  SRem); INSTKEYWORD(frem,  FRem);
   INSTKEYWORD(shl,   Shl);  INSTKEYWORD(lshr,  LShr); INSTKEYWORD(ashr,  AShr);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 8db4c715b793..5c4450244c18 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -712,25 +712,26 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
         return Error(AttrLoc, "invalid use of parameter-only attribute");
         
       return false;
-    case lltok::kw_zeroext:      Attrs |= Attribute::ZExt; break;
-    case lltok::kw_signext:      Attrs |= Attribute::SExt; break;
-    case lltok::kw_inreg:        Attrs |= Attribute::InReg; break;
-    case lltok::kw_sret:         Attrs |= Attribute::StructRet; break;
-    case lltok::kw_noalias:      Attrs |= Attribute::NoAlias; break;
-    case lltok::kw_nocapture:    Attrs |= Attribute::NoCapture; break;
-    case lltok::kw_byval:        Attrs |= Attribute::ByVal; break;
-    case lltok::kw_nest:         Attrs |= Attribute::Nest; break;
-
-    case lltok::kw_noreturn:     Attrs |= Attribute::NoReturn; break;
-    case lltok::kw_nounwind:     Attrs |= Attribute::NoUnwind; break;
-    case lltok::kw_noinline:     Attrs |= Attribute::NoInline; break;
-    case lltok::kw_readnone:     Attrs |= Attribute::ReadNone; break;
-    case lltok::kw_readonly:     Attrs |= Attribute::ReadOnly; break;
-    case lltok::kw_alwaysinline: Attrs |= Attribute::AlwaysInline; break;
-    case lltok::kw_optsize:      Attrs |= Attribute::OptimizeForSize; break;
-    case lltok::kw_ssp:          Attrs |= Attribute::StackProtect; break;
-    case lltok::kw_sspreq:       Attrs |= Attribute::StackProtectReq; break;
-
+    case lltok::kw_zeroext:         Attrs |= Attribute::ZExt; break;
+    case lltok::kw_signext:         Attrs |= Attribute::SExt; break;
+    case lltok::kw_inreg:           Attrs |= Attribute::InReg; break;
+    case lltok::kw_sret:            Attrs |= Attribute::StructRet; break;
+    case lltok::kw_noalias:         Attrs |= Attribute::NoAlias; break;
+    case lltok::kw_nocapture:       Attrs |= Attribute::NoCapture; break;
+    case lltok::kw_byval:           Attrs |= Attribute::ByVal; break;
+    case lltok::kw_nest:            Attrs |= Attribute::Nest; break;
+
+    case lltok::kw_noreturn:        Attrs |= Attribute::NoReturn; break;
+    case lltok::kw_nounwind:        Attrs |= Attribute::NoUnwind; break;
+    case lltok::kw_noinline:        Attrs |= Attribute::NoInline; break;
+    case lltok::kw_readnone:        Attrs |= Attribute::ReadNone; break;
+    case lltok::kw_readonly:        Attrs |= Attribute::ReadOnly; break;
+    case lltok::kw_alwaysinline:    Attrs |= Attribute::AlwaysInline; break;
+    case lltok::kw_optsize:         Attrs |= Attribute::OptimizeForSize; break;
+    case lltok::kw_ssp:             Attrs |= Attribute::StackProtect; break;
+    case lltok::kw_sspreq:          Attrs |= Attribute::StackProtectReq; break;
+    case lltok::kw_noredzone:       Attrs |= Attribute::NoRedZone; break;
+    case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break;
         
     case lltok::kw_align: {
       unsigned Alignment;
@@ -1835,8 +1836,11 @@ bool LLParser::ParseValID(ValID &ID) {
       
   // Binary Operators.
   case lltok::kw_add:
+  case lltok::kw_fadd:
   case lltok::kw_sub:
+  case lltok::kw_fsub:
   case lltok::kw_mul:
+  case lltok::kw_fmul:
   case lltok::kw_udiv:
   case lltok::kw_sdiv:
   case lltok::kw_fdiv:
@@ -2400,8 +2404,13 @@ bool LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   // Binary Operators.
   case lltok::kw_add:
   case lltok::kw_sub:
-  case lltok::kw_mul:    return ParseArithmetic(Inst, PFS, KeywordVal, 0);
-      
+  case lltok::kw_mul:
+    // API compatibility: Accept either integer or floating-point types.
+    return ParseArithmetic(Inst, PFS, KeywordVal, 0);
+  case lltok::kw_fadd:
+  case lltok::kw_fsub:
+  case lltok::kw_fmul:    return ParseArithmetic(Inst, PFS, KeywordVal, 2);
+
   case lltok::kw_udiv:
   case lltok::kw_sdiv:
   case lltok::kw_urem:
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index d8bd38a4a61d..9335d19612a5 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -80,6 +80,8 @@ namespace lltok {
     kw_optsize,
     kw_ssp,
     kw_sspreq,
+    kw_noredzone,
+    kw_noimplicitfloat,
 
     kw_type,
     kw_opaque,
@@ -89,7 +91,8 @@ namespace lltok {
     kw_ueq, kw_une,
 
     // Instruction Opcodes (Opcode in UIntVal).
-    kw_add,  kw_sub,  kw_mul,  kw_udiv, kw_sdiv, kw_fdiv,
+    kw_add,  kw_fadd, kw_sub,  kw_fsub, kw_mul,  kw_fmul,
+    kw_udiv, kw_sdiv, kw_fdiv,
     kw_urem, kw_srem, kw_frem, kw_shl,  kw_lshr, kw_ashr,
     kw_and,  kw_or,   kw_xor,  kw_icmp, kw_fcmp, kw_vicmp, kw_vfcmp,
 
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 1dad04bd8f6f..3b44f56421f4 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -104,9 +104,12 @@ static int GetDecodedCastOpcode(unsigned Val) {
 static int GetDecodedBinaryOpcode(unsigned Val, const Type *Ty) {
   switch (Val) {
   default: return -1;
-  case bitc::BINOP_ADD:  return Instruction::Add;
-  case bitc::BINOP_SUB:  return Instruction::Sub;
-  case bitc::BINOP_MUL:  return Instruction::Mul;
+  case bitc::BINOP_ADD:
+    return Ty->isFPOrFPVector() ? Instruction::FAdd : Instruction::Add;
+  case bitc::BINOP_SUB:
+    return Ty->isFPOrFPVector() ? Instruction::FSub : Instruction::Sub;
+  case bitc::BINOP_MUL:
+    return Ty->isFPOrFPVector() ? Instruction::FMul : Instruction::Mul;
   case bitc::BINOP_UDIV: return Instruction::UDiv;
   case bitc::BINOP_SDIV:
     return Ty->isFPOrFPVector() ? Instruction::FDiv : Instruction::SDiv;
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index bfc029c1f277..9f16728d4924 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -77,9 +77,12 @@ static unsigned GetEncodedCastOpcode(unsigned Opcode) {
 static unsigned GetEncodedBinaryOpcode(unsigned Opcode) {
   switch (Opcode) {
   default: assert(0 && "Unknown binary instruction!");
-  case Instruction::Add:  return bitc::BINOP_ADD;
-  case Instruction::Sub:  return bitc::BINOP_SUB;
-  case Instruction::Mul:  return bitc::BINOP_MUL;
+  case Instruction::Add:
+  case Instruction::FAdd: return bitc::BINOP_ADD;
+  case Instruction::Sub:
+  case Instruction::FSub: return bitc::BINOP_SUB;
+  case Instruction::Mul:
+  case Instruction::FMul: return bitc::BINOP_MUL;
   case Instruction::UDiv: return bitc::BINOP_UDIV;
   case Instruction::FDiv:
   case Instruction::SDiv: return bitc::BINOP_SDIV;
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 25217b088099..5a66f4b04b93 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -20,7 +20,6 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameInfo.h"
-#include <ostream>
 using namespace llvm;
 
 static TimerGroup &getDwarfTimerGroup() {
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
index 45e7dd305807..f7ca4f4c9045 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
@@ -21,7 +21,6 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include <ostream>
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h
new file mode 100644
index 000000000000..bf43622a6faa
--- /dev/null
+++ b/lib/CodeGen/ELF.h
@@ -0,0 +1,186 @@
+//===-- lib/CodeGen/ELF.h - ELF constants and data structures ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header contains common, non-processor-specific data structures and
+// constants for the ELF file format.
+//
+// The details of the ELF32 bits in this file are largely based on
+// the Tool Interface Standard (TIS) Executable and Linking Format
+// (ELF) Specification Version 1.2, May 1995. The ELF64 stuff is not
+// standardized, as far as I can tell. It was largely based on information
+// I found in OpenBSD header files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ELF_H
+#define CODEGEN_ELF_H
+
+#include "llvm/Support/DataTypes.h"
+#include <cstring>
+
+namespace llvm {
+  class GlobalVariable;
+
+  // Identification Indexes
+  enum {
+    EI_MAG0 = 0,
+    EI_MAG1 = 1,
+    EI_MAG2 = 2,
+    EI_MAG3 = 3
+  };
+
+  // File types
+  enum {
+    ET_NONE   = 0,      // No file type
+    ET_REL    = 1,      // Relocatable file
+    ET_EXEC   = 2,      // Executable file
+    ET_DYN    = 3,      // Shared object file
+    ET_CORE   = 4,      // Core file
+    ET_LOPROC = 0xff00, // Beginning of processor-specific codes
+    ET_HIPROC = 0xffff  // Processor-specific
+  };
+
+  // Object file classes.
+  enum {
+    ELFCLASS32 = 1, // 32-bit object file
+    ELFCLASS64 = 2  // 64-bit object file
+  };
+
+  // Object file byte orderings.
+  enum {
+    ELFDATA2LSB = 1, // Little-endian object file
+    ELFDATA2MSB = 2  // Big-endian object file
+  };
+
+  // Versioning
+  enum {
+    EV_NONE = 0,
+    EV_CURRENT = 1
+  };
+
+  /// ELFSection - This struct contains information about each section that is
+  /// emitted to the file.  This is eventually turned into the section header
+  /// table at the end of the file.
+  struct ELFSection {
+
+    // ELF specific fields
+    std::string Name;       // Name of the section.
+    unsigned NameIdx;       // Index in .shstrtab of name, once emitted.
+    unsigned Type;
+    unsigned Flags;
+    uint64_t Addr;
+    unsigned Offset;
+    unsigned Size;
+    unsigned Link;
+    unsigned Info;
+    unsigned Align;
+    unsigned EntSize;
+
+    // Section Header Flags
+    enum {
+      SHF_WRITE            = 1 << 0, // Writable
+      SHF_ALLOC            = 1 << 1, // Mapped into the process addr space
+      SHF_EXECINSTR        = 1 << 2, // Executable
+      SHF_MERGE            = 1 << 4, // Might be merged if equal
+      SHF_STRINGS          = 1 << 5, // Contains null-terminated strings
+      SHF_INFO_LINK        = 1 << 6, // 'sh_info' contains SHT index
+      SHF_LINK_ORDER       = 1 << 7, // Preserve order after combining
+      SHF_OS_NONCONFORMING = 1 << 8, // nonstandard OS support required
+      SHF_GROUP            = 1 << 9, // Section is a member of a group
+      SHF_TLS              = 1 << 10 // Section holds thread-local data
+    };
+
+    // Section Types
+    enum {
+      SHT_NULL     = 0,  // No associated section (inactive entry).
+      SHT_PROGBITS = 1,  // Program-defined contents.
+      SHT_SYMTAB   = 2,  // Symbol table.
+      SHT_STRTAB   = 3,  // String table.
+      SHT_RELA     = 4,  // Relocation entries; explicit addends.
+      SHT_HASH     = 5,  // Symbol hash table.
+      SHT_DYNAMIC  = 6,  // Information for dynamic linking.
+      SHT_NOTE     = 7,  // Information about the file.
+      SHT_NOBITS   = 8,  // Data occupies no space in the file.
+      SHT_REL      = 9,  // Relocation entries; no explicit addends.
+      SHT_SHLIB    = 10, // Reserved.
+      SHT_DYNSYM   = 11, // Symbol table.
+      SHT_LOPROC   = 0x70000000, // Lowest processor architecture-specific type.
+      SHT_HIPROC   = 0x7fffffff, // Highest processor architecture-specific type.
+      SHT_LOUSER   = 0x80000000, // Lowest type reserved for applications.
+      SHT_HIUSER   = 0xffffffff  // Highest type reserved for applications.
+    };
+
+    // Special section indices.
+    enum {
+      SHN_UNDEF     = 0,      // Undefined, missing, irrelevant, or meaningless
+      SHN_LORESERVE = 0xff00, // Lowest reserved index
+      SHN_LOPROC    = 0xff00, // Lowest processor-specific index
+      SHN_HIPROC    = 0xff1f, // Highest processor-specific index
+      SHN_ABS       = 0xfff1, // Symbol has absolute value; does not need relocation
+      SHN_COMMON    = 0xfff2, // FORTRAN COMMON or C external global variables
+      SHN_HIRESERVE = 0xffff  // Highest reserved index
+    };
+
+    /// SectionIdx - The number of the section in the Section Table.
+    unsigned short SectionIdx;
+
+    /// SectionData - The actual data for this section which we are building
+    /// up for emission to the file.
+    std::vector<unsigned char> SectionData;
+
+    ELFSection(const std::string &name)
+      : Name(name), Type(0), Flags(0), Addr(0), Offset(0), Size(0),
+        Link(0), Info(0), Align(0), EntSize(0) {}
+  };
+
+  /// ELFSym - This struct contains information about each symbol that is
+  /// added to logical symbol table for the module.  This is eventually
+  /// turned into a real symbol table in the file.
+  struct ELFSym {
+    const GlobalValue *GV;    // The global value this corresponds to.
+
+    // ELF specific fields
+    unsigned NameIdx;         // Index in .strtab of name, once emitted.
+    uint64_t Value;
+    unsigned Size;
+    uint8_t Info;
+    uint8_t Other;
+    unsigned short SectionIdx;
+
+    enum { 
+      STB_LOCAL = 0,
+      STB_GLOBAL = 1,
+      STB_WEAK = 2 
+    };
+
+    enum { 
+      STT_NOTYPE = 0,
+      STT_OBJECT = 1,
+      STT_FUNC = 2,
+      STT_SECTION = 3,
+      STT_FILE = 4 
+    };
+
+    ELFSym(const GlobalValue *gv) : GV(gv), Value(0),
+                                    Size(0), Info(0), Other(0),
+                                    SectionIdx(ELFSection::SHN_UNDEF) {}
+
+    void SetBind(unsigned X) {
+      assert(X == (X & 0xF) && "Bind value out of range!");
+      Info = (Info & 0x0F) | (X << 4);
+    }
+    void SetType(unsigned X) {
+      assert(X == (X & 0xF) && "Type value out of range!");
+      Info = (Info & 0xF0) | X;
+    }
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp
index 0a0245f3dd59..9af276b4f932 100644
--- a/lib/CodeGen/ELFCodeEmitter.cpp
+++ b/lib/CodeGen/ELFCodeEmitter.cpp
@@ -7,17 +7,17 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "elfce"
+
 #include "ELFCodeEmitter.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Target/TargetAsmInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/Mangler.h"
-#include "llvm/Support/OutputBuffer.h"
+#include "llvm/Support/Debug.h"
 
 //===----------------------------------------------------------------------===//
 //                       ELFCodeEmitter Implementation
@@ -27,67 +27,87 @@ namespace llvm {
 
 /// startFunction - This callback is invoked when a new machine function is
 /// about to be emitted.
-void ELFCodeEmitter::startFunction(MachineFunction &F) {
-  // Align the output buffer to the appropriate alignment.
-  unsigned Align = 16;   // FIXME: GENERICIZE!!
+void ELFCodeEmitter::startFunction(MachineFunction &MF) {
+  const TargetData *TD = TM.getTargetData();
+  const Function *F = MF.getFunction();
+
+  // Align the output buffer to the appropriate alignment, power of 2.
+  unsigned FnAlign = F->getAlignment();
+  unsigned TDAlign = TD->getPrefTypeAlignment(F->getType());
+  unsigned Align = std::max(FnAlign, TDAlign);
+  assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
+
   // Get the ELF Section that this function belongs in.
-  ES = &EW.getSection(".text", ELFWriter::ELFSection::SHT_PROGBITS,
-                      ELFWriter::ELFSection::SHF_EXECINSTR |
-                      ELFWriter::ELFSection::SHF_ALLOC);
-  OutBuffer = &ES->SectionData;
-  cerr << "FIXME: This code needs to be updated for changes in the "
-       << "CodeEmitter interfaces.  In particular, this should set "
-       << "BufferBegin/BufferEnd/CurBufferPtr, not deal with OutBuffer!";
-  abort();
+  ES = &EW.getTextSection();
+
+  // FIXME: better memory management, this will be replaced by BinaryObjects
+  ES->SectionData.reserve(4096);
+  BufferBegin = &ES->SectionData[0];
+  BufferEnd = BufferBegin + ES->SectionData.capacity();
 
   // Upgrade the section alignment if required.
   if (ES->Align < Align) ES->Align = Align;
 
-  // Add padding zeros to the end of the buffer to make sure that the
-  // function will start on the correct byte alignment within the section.
-  OutputBuffer OB(*OutBuffer,
-                  TM.getTargetData()->getPointerSizeInBits() == 64,
-                  TM.getTargetData()->isLittleEndian());
-  OB.align(Align);
-  FnStart = OutBuffer->size();
+  // Round the size up to the correct alignment for starting the new function.
+  ES->Size = (ES->Size + (Align-1)) & (-Align);
+
+  // Snaity check on allocated space for text section
+  assert( ES->Size < 4096 && "no more space in TextSection" );
+
+  // FIXME: Using ES->Size directly here instead of calculating it from the
+  // output buffer size (impossible because the code emitter deals only in raw
+  // bytes) forces us to manually synchronize size and write padding zero bytes
+  // to the output buffer for all non-text sections.  For text sections, we do
+  // not synchonize the output buffer, and we just blow up if anyone tries to
+  // write non-code to it.  An assert should probably be added to
+  // AddSymbolToSection to prevent calling it on the text section.
+  CurBufferPtr = BufferBegin + ES->Size;
+
+  // Record function start address relative to BufferBegin
+  FnStartPtr = CurBufferPtr;
 }
 
 /// finishFunction - This callback is invoked after the function is completely
 /// finished.
-bool ELFCodeEmitter::finishFunction(MachineFunction &F) {
-  // We now know the size of the function, add a symbol to represent it.
-  ELFWriter::ELFSym FnSym(F.getFunction());
+bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
+  // Add a symbol to represent the function.
+  ELFSym FnSym(MF.getFunction());
 
   // Figure out the binding (linkage) of the symbol.
-  switch (F.getFunction()->getLinkage()) {
+  switch (MF.getFunction()->getLinkage()) {
   default:
     // appending linkage is illegal for functions.
     assert(0 && "Unknown linkage type!");
   case GlobalValue::ExternalLinkage:
-    FnSym.SetBind(ELFWriter::ELFSym::STB_GLOBAL);
+    FnSym.SetBind(ELFSym::STB_GLOBAL);
     break;
   case GlobalValue::LinkOnceAnyLinkage:
   case GlobalValue::LinkOnceODRLinkage:
   case GlobalValue::WeakAnyLinkage:
   case GlobalValue::WeakODRLinkage:
-    FnSym.SetBind(ELFWriter::ELFSym::STB_WEAK);
+    FnSym.SetBind(ELFSym::STB_WEAK);
     break;
   case GlobalValue::PrivateLinkage:
     assert (0 && "PrivateLinkage should not be in the symbol table.");
   case GlobalValue::InternalLinkage:
-    FnSym.SetBind(ELFWriter::ELFSym::STB_LOCAL);
+    FnSym.SetBind(ELFSym::STB_LOCAL);
     break;
   }
 
-  ES->Size = OutBuffer->size();
+  // Set the symbol type as a function
+  FnSym.SetType(ELFSym::STT_FUNC);
 
-  FnSym.SetType(ELFWriter::ELFSym::STT_FUNC);
   FnSym.SectionIdx = ES->SectionIdx;
-  FnSym.Value = FnStart;   // Value = Offset from start of Section.
-  FnSym.Size = OutBuffer->size()-FnStart;
+  FnSym.Size = CurBufferPtr-FnStartPtr;
+
+  // Offset from start of Section
+  FnSym.Value = FnStartPtr-BufferBegin;
 
   // Finally, add it to the symtab.
   EW.SymbolTable.push_back(FnSym);
+
+  // Update Section Size
+  ES->Size = CurBufferPtr - BufferBegin;
   return false;
 }
 
diff --git a/lib/CodeGen/ELFCodeEmitter.h b/lib/CodeGen/ELFCodeEmitter.h
index 11ebcc871da1..e9ee936a4865 100644
--- a/lib/CodeGen/ELFCodeEmitter.h
+++ b/lib/CodeGen/ELFCodeEmitter.h
@@ -21,11 +21,10 @@ namespace llvm {
   class ELFCodeEmitter : public MachineCodeEmitter {
     ELFWriter &EW;
     TargetMachine &TM;
-    ELFWriter::ELFSection *ES;  // Section to write to.
-    std::vector<unsigned char> *OutBuffer;
-    size_t FnStart;
+    ELFSection *ES;  // Section to write to.
+    uint8_t *FnStartPtr;
   public:
-    explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM), OutBuffer(0) {}
+    explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM) {}
 
     void startFunction(MachineFunction &F);
     bool finishFunction(MachineFunction &F);
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index be8edce08d74..24f12a38d41c 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -33,6 +33,7 @@
 
 #include "ELFWriter.h"
 #include "ELFCodeEmitter.h"
+#include "ELF.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/DerivedTypes.h"
@@ -67,7 +68,8 @@ MachineCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM,
 
 ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
   : MachineFunctionPass(&ID), O(o), TM(tm) {
-  e_flags = 0;    // e_flags defaults to 0, no flags.
+  e_flags = 0;  // e_flags defaults to 0, no flags.
+  e_machine = TM.getELFWriterInfo()->getEMachine();
 
   is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
   isLittleEndian = TM.getTargetData()->isLittleEndian();
@@ -90,24 +92,39 @@ bool ELFWriter::doInitialization(Module &M) {
   std::vector<unsigned char> &FH = FileHeader;
   OutputBuffer FHOut(FH, is64Bit, isLittleEndian);
 
-  FHOut.outbyte(0x7F);                     // EI_MAG0
-  FHOut.outbyte('E');                      // EI_MAG1
-  FHOut.outbyte('L');                      // EI_MAG2
-  FHOut.outbyte('F');                      // EI_MAG3
-  FHOut.outbyte(is64Bit ? 2 : 1);          // EI_CLASS
-  FHOut.outbyte(isLittleEndian ? 1 : 2);   // EI_DATA
-  FHOut.outbyte(1);                        // EI_VERSION
-  FH.resize(16);                         // EI_PAD up to 16 bytes.
-
-  // This should change for shared objects.
-  FHOut.outhalf(1);                 // e_type = ET_REL
-  FHOut.outhalf(TM.getELFWriterInfo()->getEMachine()); // target-defined
-  FHOut.outword(1);                 // e_version = 1
-  FHOut.outaddr(0);                 // e_entry = 0 -> no entry point in .o file
-  FHOut.outaddr(0);                 // e_phoff = 0 -> no program header for .o
-
-  ELFHeader_e_shoff_Offset = FH.size();
-  FHOut.outaddr(0);                 // e_shoff
+  unsigned ElfClass = is64Bit ? ELFCLASS64 : ELFCLASS32;
+  unsigned ElfEndian = isLittleEndian ? ELFDATA2LSB : ELFDATA2MSB;
+
+  // ELF Header
+  // ----------
+  // Fields e_shnum e_shstrndx are only known after all section have
+  // been emitted. They locations in the ouput buffer are recorded so
+  // to be patched up later.
+  //
+  // Note
+  // ----
+  // FHOut.outaddr method behaves differently for ELF32 and ELF64 writing
+  // 4 bytes in the former and 8 in the last for *_off and *_addr elf types
+
+  FHOut.outbyte(0x7f); // e_ident[EI_MAG0]
+  FHOut.outbyte('E');  // e_ident[EI_MAG1]
+  FHOut.outbyte('L');  // e_ident[EI_MAG2]
+  FHOut.outbyte('F');  // e_ident[EI_MAG3]
+
+  FHOut.outbyte(ElfClass);   // e_ident[EI_CLASS]
+  FHOut.outbyte(ElfEndian);  // e_ident[EI_DATA]
+  FHOut.outbyte(EV_CURRENT); // e_ident[EI_VERSION]
+
+  FH.resize(16);  // e_ident[EI_NIDENT-EI_PAD]
+
+  FHOut.outhalf(ET_REL);     // e_type
+  FHOut.outhalf(e_machine);  // e_machine = target
+  FHOut.outword(EV_CURRENT); // e_version
+  FHOut.outaddr(0);          // e_entry = 0 -> no entry point in .o file
+  FHOut.outaddr(0);          // e_phoff = 0 -> no program header for .o
+
+  ELFHdr_e_shoff_Offset = FH.size();
+  FHOut.outaddr(0);                 // e_shoff = sec hdr table off in bytes
   FHOut.outword(e_flags);           // e_flags = whatever the target wants
 
   FHOut.outhalf(is64Bit ? 64 : 52); // e_ehsize = ELF header size
@@ -115,14 +132,16 @@ bool ELFWriter::doInitialization(Module &M) {
   FHOut.outhalf(0);                 // e_phnum     = # prog header entries = 0
   FHOut.outhalf(is64Bit ? 64 : 40); // e_shentsize = sect hdr entry size
 
+  // e_shnum     = # of section header ents
+  ELFHdr_e_shnum_Offset = FH.size();
+  FHOut.outhalf(0);
 
-  ELFHeader_e_shnum_Offset = FH.size();
-  FHOut.outhalf(0);                 // e_shnum     = # of section header ents
-  ELFHeader_e_shstrndx_Offset = FH.size();
-  FHOut.outhalf(0);                 // e_shstrndx  = Section # of '.shstrtab'
+  // e_shstrndx  = Section # of '.shstrtab'
+  ELFHdr_e_shstrndx_Offset = FH.size();
+  FHOut.outhalf(0);
 
   // Add the null section, which is required to be first in the file.
-  getSection("", 0, 0);
+  getSection("", ELFSection::SHT_NULL, 0);
 
   // Start up the symbol table.  The first entry in the symtab is the null
   // entry.
@@ -334,7 +353,7 @@ void ELFWriter::EmitSectionTableStringTable() {
   // Now that we know which section number is the .shstrtab section, update the
   // e_shstrndx entry in the ELF header.
   OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian);
-  FHOut.fixhalf(SHStrTab.SectionIdx, ELFHeader_e_shstrndx_Offset);
+  FHOut.fixhalf(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset);
 
   // Set the NameIdx of each section in the string table and emit the bytes for
   // the string table.
@@ -386,11 +405,11 @@ void ELFWriter::OutputSectionsAndSectionTable() {
   // Now that we know where all of the sections will be emitted, set the e_shnum
   // entry in the ELF header.
   OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian);
-  FHOut.fixhalf(NumSections, ELFHeader_e_shnum_Offset);
+  FHOut.fixhalf(NumSections, ELFHdr_e_shnum_Offset);
 
   // Now that we know the offset in the file of the section table, update the
   // e_shoff address in the ELF header.
-  FHOut.fixaddr(FileOff, ELFHeader_e_shoff_Offset);
+  FHOut.fixaddr(FileOff, ELFHdr_e_shoff_Offset);
 
   // Now that we know all of the data in the file header, emit it and all of the
   // sections!
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
index 31aa05a9c4a5..0389185f1db4 100644
--- a/lib/CodeGen/ELFWriter.h
+++ b/lib/CodeGen/ELFWriter.h
@@ -15,6 +15,7 @@
 #define ELFWRITER_H
 
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "ELF.h"
 #include <list>
 #include <map>
 
@@ -82,10 +83,8 @@ namespace llvm {
     /// doInitialization - Emit the file header and all of the global variables
     /// for the module to the ELF file.
     bool doInitialization(Module &M);
-
     bool runOnMachineFunction(MachineFunction &MF);
 
-
     /// doFinalization - Now that the module has been completely processed, emit
     /// the ELF file to 'O'.
     bool doFinalization(Module &M);
@@ -96,53 +95,6 @@ namespace llvm {
     // as well!).
     DataBuffer FileHeader;
 
-    /// ELFSection - This struct contains information about each section that is
-    /// emitted to the file.  This is eventually turned into the section header
-    /// table at the end of the file.
-    struct ELFSection {
-      std::string Name;       // Name of the section.
-      unsigned NameIdx;       // Index in .shstrtab of name, once emitted.
-      unsigned Type;
-      unsigned Flags;
-      uint64_t Addr;
-      unsigned Offset;
-      unsigned Size;
-      unsigned Link;
-      unsigned Info;
-      unsigned Align;
-      unsigned EntSize;
-
-      /// SectionIdx - The number of the section in the Section Table.
-      ///
-      unsigned short SectionIdx;
-
-      /// SectionData - The actual data for this section which we are building
-      /// up for emission to the file.
-      DataBuffer SectionData;
-
-      enum { SHT_NULL = 0, SHT_PROGBITS = 1, SHT_SYMTAB = 2, SHT_STRTAB = 3,
-             SHT_RELA = 4, SHT_HASH = 5, SHT_DYNAMIC = 6, SHT_NOTE = 7,
-             SHT_NOBITS = 8, SHT_REL = 9, SHT_SHLIB = 10, SHT_DYNSYM = 11 };
-      enum { SHN_UNDEF = 0, SHN_ABS = 0xFFF1, SHN_COMMON = 0xFFF2 };
-      enum {   // SHF - ELF Section Header Flags
-        SHF_WRITE            = 1 << 0, // Writable
-        SHF_ALLOC            = 1 << 1, // Mapped into the process addr space
-        SHF_EXECINSTR        = 1 << 2, // Executable
-        SHF_MERGE            = 1 << 4, // Might be merged if equal
-        SHF_STRINGS          = 1 << 5, // Contains null-terminated strings
-        SHF_INFO_LINK        = 1 << 6, // 'sh_info' contains SHT index
-        SHF_LINK_ORDER       = 1 << 7, // Preserve order after combining
-        SHF_OS_NONCONFORMING = 1 << 8, // nonstandard OS support required
-        SHF_GROUP            = 1 << 9, // Section is a member of a group
-        SHF_TLS              = 1 << 10 // Section holds thread-local data
-      };
-
-      ELFSection(const std::string &name)
-        : Name(name), Type(0), Flags(0), Addr(0), Offset(0), Size(0),
-          Link(0), Info(0), Align(0), EntSize(0) {
-      }
-    };
-
     /// SectionList - This is the list of sections that we have emitted to the
     /// file.  Once the file has been completely built, the section header table
     /// is constructed from this info.
@@ -165,9 +117,15 @@ namespace llvm {
       SN->SectionIdx = NumSections++;
       SN->Type = Type;
       SN->Flags = Flags;
+      SN->Link = ELFSection::SHN_UNDEF;
       return *SN;
     }
 
+    ELFSection &getTextSection() {
+      return getSection(".text", ELFSection::SHT_PROGBITS,
+                        ELFSection::SHF_EXECINSTR | ELFSection::SHF_ALLOC);
+    }
+
     ELFSection &getDataSection() {
       return getSection(".data", ELFSection::SHT_PROGBITS,
                         ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC);
@@ -177,34 +135,6 @@ namespace llvm {
                         ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC);
     }
 
-    /// ELFSym - This struct contains information about each symbol that is
-    /// added to logical symbol table for the module.  This is eventually
-    /// turned into a real symbol table in the file.
-    struct ELFSym {
-      const GlobalValue *GV;    // The global value this corresponds to.
-      unsigned NameIdx;         // Index in .strtab of name, once emitted.
-      uint64_t Value;
-      unsigned Size;
-      unsigned char Info;
-      unsigned char Other;
-      unsigned short SectionIdx;
-
-      enum { STB_LOCAL = 0, STB_GLOBAL = 1, STB_WEAK = 2 };
-      enum { STT_NOTYPE = 0, STT_OBJECT = 1, STT_FUNC = 2, STT_SECTION = 3,
-             STT_FILE = 4 };
-      ELFSym(const GlobalValue *gv) : GV(gv), Value(0), Size(0), Info(0),
-                                      Other(0), SectionIdx(0) {}
-
-      void SetBind(unsigned X) {
-        assert(X == (X & 0xF) && "Bind value out of range!");
-        Info = (Info & 0x0F) | (X << 4);
-      }
-      void SetType(unsigned X) {
-        assert(X == (X & 0xF) && "Type value out of range!");
-        Info = (Info & 0xF0) | X;
-      }
-    };
-
     /// SymbolTable - This is the list of symbols we have emitted to the file.
     /// This actually gets rearranged before emission to the file (to put the
     /// local symbols first in the list).
@@ -214,9 +144,9 @@ namespace llvm {
     // (e.g. the location of the section table).  These members keep track of
     // the offset in ELFHeader of these various pieces to update and other
     // locations in the file.
-    unsigned ELFHeader_e_shoff_Offset;     // e_shoff    in ELF header.
-    unsigned ELFHeader_e_shstrndx_Offset;  // e_shstrndx in ELF header.
-    unsigned ELFHeader_e_shnum_Offset;     // e_shnum    in ELF header.
+    unsigned ELFHdr_e_shoff_Offset;     // e_shoff    in ELF header.
+    unsigned ELFHdr_e_shstrndx_Offset;  // e_shstrndx in ELF header.
+    unsigned ELFHdr_e_shnum_Offset;     // e_shnum    in ELF header.
   private:
     void EmitGlobal(GlobalVariable *GV);
 
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index b8c8563eab45..c35159360e8a 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -28,7 +28,6 @@
 #include "llvm/Support/Streams.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/FoldingSet.h"
-#include <ostream>
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 85208887f483..804fae55e545 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -40,7 +40,6 @@
 #include <queue>
 #include <memory>
 #include <cmath>
-#include <iostream>
 
 using namespace llvm;
 
@@ -399,7 +398,7 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
     }
 
     ++NumCoalesce;
-    return SrcReg;
+    return PhysReg;
   }
 
   return Reg;
@@ -543,13 +542,37 @@ void RALinScan::linearScan()
     // Ignore splited live intervals.
     if (!isPhys && vrm_->getPreSplitReg(cur.reg))
       continue;
+
+    // A register defined by an implicit_def can be liveout the def BB and livein
+    // to a use BB. Add it to the livein set of the use BB's.
+    if (!isPhys && cur.empty()) {
+      if (MachineInstr *DefMI = mri_->getVRegDef(cur.reg)) {
+        assert(DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF);
+        MachineBasicBlock *DefMBB = DefMI->getParent();
+        SmallPtrSet<MachineBasicBlock*, 4> Seen;
+        Seen.insert(DefMBB);
+        for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(cur.reg),
+               re = mri_->reg_end(); ri != re; ++ri) {
+          MachineInstr *UseMI = &*ri;
+          MachineBasicBlock *UseMBB = UseMI->getParent();
+          if (Seen.insert(UseMBB)) {
+            assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+                   "Adding a virtual register to livein set?");
+            UseMBB->addLiveIn(Reg);
+          }
+        }
+      }
+    }
     for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end();
          I != E; ++I) {
       const LiveRange &LR = *I;
       if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) {
         for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i)
-          if (LiveInMBBs[i] != EntryMBB)
+          if (LiveInMBBs[i] != EntryMBB) {
+            assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+                   "Adding a virtual register to livein set?");
             LiveInMBBs[i]->addLiveIn(Reg);
+          }
         LiveInMBBs.clear();
       }
     }
@@ -1192,7 +1215,6 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
   // The earliest start of a Spilled interval indicates up to where
   // in handled we need to roll back
   
-  unsigned earliestStart = cur->beginNumber();
   LiveInterval *earliestStartInterval = cur;
 
   // Spill live intervals of virtual regs mapped to the physical register we
@@ -1206,19 +1228,10 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
     LiveInterval *sli = spillIs.back();
     spillIs.pop_back();
     DOUT << "\t\t\tspilling(a): " << *sli << '\n';
-    earliestStart = std::min(earliestStart, sli->beginNumber());
     earliestStartInterval =
       (earliestStartInterval->beginNumber() < sli->beginNumber()) ?
          earliestStartInterval : sli;
-    
-    if (earliestStartInterval->beginNumber()!=earliestStart) {
-      epicFail |= true;
-      std::cerr << "What the 1 - "
-      		<< "earliestStart = " << earliestStart
-      		<< "earliestStartInterval = " << earliestStartInterval->beginNumber()
-      		<< "\n";
-    }
-   
+       
     std::vector<LiveInterval*> newIs;
     if (!NewSpillFramework) {
       newIs = li_->addIntervalsForSpills(*sli, spillIs, loopInfo, *vrm_);
@@ -1229,20 +1242,12 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
     std::copy(newIs.begin(), newIs.end(), std::back_inserter(added));
     spilled.insert(sli->reg);
 
-    if (earliestStartInterval->beginNumber()!=earliestStart) {
-      epicFail |= true;
-      std::cerr << "What the 2 - "
-      		<< "earliestStart = " << earliestStart
-      		<< "earliestStartInterval = " << earliestStartInterval->beginNumber()
-      		<< "\n";
-    }
-
     if (epicFail) {
       //abort();
     }
   }
 
-  earliestStart = earliestStartInterval->beginNumber();
+  unsigned earliestStart = earliestStartInterval->beginNumber();
 
   DOUT << "\t\trolling back to: " << earliestStart << '\n';
 
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4c1710dd81fa..609ec82c5ad1 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3626,30 +3626,29 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) {
   assert(N->getOpcode() == ISD::BUILD_PAIR);
 
-  SDNode *LD1 = getBuildPairElt(N, 0);
-  if (!ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
+  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
+  LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
     return SDValue();
   MVT LD1VT = LD1->getValueType(0);
-  SDNode *LD2 = getBuildPairElt(N, 1);
   const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
 
   if (ISD::isNON_EXTLoad(LD2) &&
       LD2->hasOneUse() &&
       // If both are volatile this would reduce the number of volatile loads.
       // If one is volatile it might be ok, but play conservative and bail out.
-      !cast<LoadSDNode>(LD1)->isVolatile() &&
-      !cast<LoadSDNode>(LD2)->isVolatile() &&
+      !LD1->isVolatile() &&
+      !LD2->isVolatile() &&
       TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) {
-    LoadSDNode *LD = cast<LoadSDNode>(LD1);
-    unsigned Align = LD->getAlignment();
+    unsigned Align = LD1->getAlignment();
     unsigned NewAlign = TLI.getTargetData()->
       getABITypeAlignment(VT.getTypeForMVT());
 
     if (NewAlign <= Align &&
         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
-      return DAG.getLoad(VT, N->getDebugLoc(), LD->getChain(), LD->getBasePtr(),
-                         LD->getSrcValue(), LD->getSrcValueOffset(),
-                         false, Align);
+      return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
+                         LD1->getBasePtr(), LD1->getSrcValue(),
+                         LD1->getSrcValueOffset(), false, Align);
   }
 
   return SDValue();
@@ -4019,6 +4018,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   // fold (fmul A, 0) -> 0
   if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
     return N1;
+  // fold (fmul A, 0) -> 0, vector edition.
+  if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode()))
+    return N1;
   // fold (fmul X, 2.0) -> (fadd X, X)
   if (N1CFP && N1CFP->isExactlyValue(+2.0))
     return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0);
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 6becff32176e..4a7dbebe2de8 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -639,18 +639,18 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc) {
 bool
 FastISel::SelectOperator(User *I, unsigned Opcode) {
   switch (Opcode) {
-  case Instruction::Add: {
-    ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FADD : ISD::ADD;
-    return SelectBinaryOp(I, Opc);
-  }
-  case Instruction::Sub: {
-    ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FSUB : ISD::SUB;
-    return SelectBinaryOp(I, Opc);
-  }
-  case Instruction::Mul: {
-    ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FMUL : ISD::MUL;
-    return SelectBinaryOp(I, Opc);
-  }
+  case Instruction::Add:
+    return SelectBinaryOp(I, ISD::ADD);
+  case Instruction::FAdd:
+    return SelectBinaryOp(I, ISD::FADD);
+  case Instruction::Sub:
+    return SelectBinaryOp(I, ISD::SUB);
+  case Instruction::FSub:
+    return SelectBinaryOp(I, ISD::FSUB);
+  case Instruction::Mul:
+    return SelectBinaryOp(I, ISD::MUL);
+  case Instruction::FMul:
+    return SelectBinaryOp(I, ISD::FMUL);
   case Instruction::SDiv:
     return SelectBinaryOp(I, ISD::SDIV);
   case Instruction::UDiv:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 2cd67e61907f..5ae183e2fa09 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -116,6 +116,8 @@ private:
   /// result.
   SDValue LegalizeOp(SDValue O);
 
+  SDValue OptimizeFloatStore(StoreSDNode *ST);
+
   /// PerformInsertVectorEltInMemory - Some target cannot handle a variable
   /// insertion index for the INSERT_VECTOR_ELT instruction.  In this case, it
   /// is necessary to spill the vector being inserted into to memory, perform
@@ -165,6 +167,7 @@ private:
   SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl);
 
   SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+  SDValue ExpandVectorBuildThroughStack(SDNode* Node);
 
   void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
   void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
@@ -681,6 +684,59 @@ ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) {
   return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl);
 }
 
+SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
+  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+  // FIXME: We shouldn't do this for TargetConstantFP's.
+  // FIXME: move this to the DAG Combiner!  Note that we can't regress due
+  // to phase ordering between legalized code and the dag combiner.  This
+  // probably means that we need to integrate dag combiner and legalizer
+  // together.
+  // We generally can't do this one for long doubles.
+  SDValue Tmp1 = ST->getChain();
+  SDValue Tmp2 = ST->getBasePtr();
+  SDValue Tmp3;
+  int SVOffset = ST->getSrcValueOffset();
+  unsigned Alignment = ST->getAlignment();
+  bool isVolatile = ST->isVolatile();
+  DebugLoc dl = ST->getDebugLoc();
+  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
+    if (CFP->getValueType(0) == MVT::f32 &&
+        getTypeAction(MVT::i32) == Legal) {
+      Tmp3 = DAG.getConstant(CFP->getValueAPF().
+                                      bitcastToAPInt().zextOrTrunc(32),
+                              MVT::i32);
+      return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+                          SVOffset, isVolatile, Alignment);
+    } else if (CFP->getValueType(0) == MVT::f64) {
+      // If this target supports 64-bit registers, do a single 64-bit store.
+      if (getTypeAction(MVT::i64) == Legal) {
+        Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+                                  zextOrTrunc(64), MVT::i64);
+        return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+                            SVOffset, isVolatile, Alignment);
+      } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
+        // Otherwise, if the target supports 32-bit registers, use 2 32-bit
+        // stores.  If the target supports neither 32- nor 64-bits, this
+        // xform is certainly not worth it.
+        const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();
+        SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32);
+        SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
+        if (TLI.isBigEndian()) std::swap(Lo, Hi);
+
+        Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(),
+                          SVOffset, isVolatile, Alignment);
+        Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                            DAG.getIntPtrConstant(4));
+        Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4,
+                          isVolatile, MinAlign(Alignment, 4U));
+
+        return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+      }
+    }
+  }
+  return SDValue();
+}
+
 /// LegalizeOp - We know that the specified value has a legal type, and
 /// that its operands are legal.  Now ensure that the operation itself
 /// is legal, recursively ensuring that the operands' operations remain
@@ -1293,50 +1349,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     bool isVolatile = ST->isVolatile();
 
     if (!ST->isTruncatingStore()) {
-      // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
-      // FIXME: We shouldn't do this for TargetConstantFP's.
-      // FIXME: move this to the DAG Combiner!  Note that we can't regress due
-      // to phase ordering between legalized code and the dag combiner.  This
-      // probably means that we need to integrate dag combiner and legalizer
-      // together.
-      // We generally can't do this one for long doubles.
-      if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
-        if (CFP->getValueType(0) == MVT::f32 &&
-            getTypeAction(MVT::i32) == Legal) {
-          Tmp3 = DAG.getConstant(CFP->getValueAPF().
-                                          bitcastToAPInt().zextOrTrunc(32),
-                                  MVT::i32);
-          Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                                SVOffset, isVolatile, Alignment);
-          break;
-        } else if (CFP->getValueType(0) == MVT::f64) {
-          // If this target supports 64-bit registers, do a single 64-bit store.
-          if (getTypeAction(MVT::i64) == Legal) {
-            Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
-                                     zextOrTrunc(64), MVT::i64);
-            Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                                  SVOffset, isVolatile, Alignment);
-            break;
-          } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
-            // Otherwise, if the target supports 32-bit registers, use 2 32-bit
-            // stores.  If the target supports neither 32- nor 64-bits, this
-            // xform is certainly not worth it.
-            const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();
-            SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32);
-            SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
-            if (TLI.isBigEndian()) std::swap(Lo, Hi);
-
-            Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(),
-                              SVOffset, isVolatile, Alignment);
-            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
-                               DAG.getIntPtrConstant(4));
-            Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4,
-                              isVolatile, MinAlign(Alignment, 4U));
-
-            Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
-            break;
-          }
-        }
+      if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
+        Result = SDValue(OptStore, 0);
+        break;
       }
 
       {
@@ -1510,6 +1525,46 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
   return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0);
 }
 
+SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
+  // We can't handle this case efficiently.  Allocate a sufficiently
+  // aligned object on the stack, store each element into it, then load
+  // the result as a vector.
+  // Create the stack frame object.
+  MVT VT = Node->getValueType(0);
+  MVT OpVT = Node->getOperand(0).getValueType();
+  DebugLoc dl = Node->getDebugLoc();
+  SDValue FIPtr = DAG.CreateStackTemporary(VT);
+  int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
+  const Value *SV = PseudoSourceValue::getFixedStack(FI);
+
+  // Emit a store of each element to the stack slot.
+  SmallVector<SDValue, 8> Stores;
+  unsigned TypeByteSize = OpVT.getSizeInBits() / 8;
+  // Store (in the right endianness) the elements to memory.
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+    // Ignore undef elements.
+    if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+    unsigned Offset = TypeByteSize*i;
+
+    SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());
+    Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
+
+    Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i),
+                                  Idx, SV, Offset));
+  }
+
+  SDValue StoreChain;
+  if (!Stores.empty())    // Not all undef elements?
+    StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                             &Stores[0], Stores.size());
+  else
+    StoreChain = DAG.getEntryNode();
+
+  // Result is a load from the stack slot.
+  return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0);
+}
+
 SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
   DebugLoc dl = Node->getDebugLoc();
   SDValue Tmp1 = Node->getOperand(0);
@@ -1853,40 +1908,8 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
     }
   }
 
-  // Otherwise, we can't handle this case efficiently.  Allocate a sufficiently
-  // aligned object on the stack, store each element into it, then load
-  // the result as a vector.
-  // Create the stack frame object.
-  SDValue FIPtr = DAG.CreateStackTemporary(VT);
-  int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
-  const Value *SV = PseudoSourceValue::getFixedStack(FI);
-
-  // Emit a store of each element to the stack slot.
-  SmallVector<SDValue, 8> Stores;
-  unsigned TypeByteSize = OpVT.getSizeInBits() / 8;
-  // Store (in the right endianness) the elements to memory.
-  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
-    // Ignore undef elements.
-    if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue;
-
-    unsigned Offset = TypeByteSize*i;
-
-    SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());
-    Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
-
-    Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i),
-                                  Idx, SV, Offset));
-  }
-
-  SDValue StoreChain;
-  if (!Stores.empty())    // Not all undef elements?
-    StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                             &Stores[0], Stores.size());
-  else
-    StoreChain = DAG.getEntryNode();
-
-  // Result is a load from the stack slot.
-  return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0);
+  // Otherwise, we can't handle this case efficiently.
+  return ExpandVectorBuildThroughStack(Node);
 }
 
 // ExpandLibCall - Expand a node into a call to a libcall.  If the result value
@@ -2437,23 +2460,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
     break;
   case ISD::CONCAT_VECTORS: {
-    // Use extract/insert/build vector for now. We might try to be
-    // more clever later.
-    SmallVector<SDValue, 8> Ops;
-    unsigned NumOperands = Node->getNumOperands();
-    for (unsigned i=0; i < NumOperands; ++i) {
-      SDValue SubOp = Node->getOperand(i);
-      MVT VVT = SubOp.getNode()->getValueType(0);
-      MVT EltVT = VVT.getVectorElementType();
-      unsigned NumSubElem = VVT.getVectorNumElements();
-      for (unsigned j=0; j < NumSubElem; ++j) {
-        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
-                                  DAG.getIntPtrConstant(j)));
-      }
-    }
-    Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
-                       &Ops[0], Ops.size());
-    Results.push_back(Tmp1);
+    Results.push_back(ExpandVectorBuildThroughStack(Node));
     break;
   }
   case ISD::SCALAR_TO_VECTOR:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index eb9342cc8b8e..0c826f67c24a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -356,13 +356,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
   unsigned NewOpc = N->getOpcode();
   DebugLoc dl = N->getDebugLoc();
 
-  // If we're promoting a UINT to a larger size, check to see if the new node
-  // will be legal.  If it isn't, check to see if FP_TO_SINT is legal, since
-  // we can use that instead.  This allows us to generate better code for
-  // FP_TO_UINT for small destination sizes on targets where FP_TO_UINT is not
-  // legal, such as PowerPC.
+  // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
+  // not Legal, check to see if we can use FP_TO_SINT instead.  (If both UINT
+  // and SINT conversions are Custom, there is no way to tell which is preferable.
+  // We choose SINT because that's the right thing on PPC.)  
   if (N->getOpcode() == ISD::FP_TO_UINT &&
-      !TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NVT) &&
+      !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
       TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
     NewOpc = ISD::FP_TO_SINT;
 
@@ -1747,7 +1746,9 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
   DebugLoc dl = N->getDebugLoc();
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
-  if (VT == MVT::i32)
+  if (VT == MVT::i16)
+    LC = RTLIB::SDIV_I16;
+  else if (VT == MVT::i32)
     LC = RTLIB::SDIV_I32;
   else if (VT == MVT::i64)
     LC = RTLIB::SDIV_I64;
@@ -1909,7 +1910,9 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
   DebugLoc dl = N->getDebugLoc();
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
-  if (VT == MVT::i32)
+  if (VT == MVT::i16)
+    LC = RTLIB::SREM_I16;
+  else if (VT == MVT::i32)
     LC = RTLIB::SREM_I32;
   else if (VT == MVT::i64)
     LC = RTLIB::SREM_I64;
@@ -1938,7 +1941,9 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
   DebugLoc dl = N->getDebugLoc();
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
-  if (VT == MVT::i32)
+  if (VT == MVT::i16)
+    LC = RTLIB::UDIV_I16;
+  else if (VT == MVT::i32)
     LC = RTLIB::UDIV_I32;
   else if (VT == MVT::i64)
     LC = RTLIB::UDIV_I64;
@@ -1956,7 +1961,9 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
   DebugLoc dl = N->getDebugLoc();
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
-  if (VT == MVT::i32)
+  if (VT == MVT::i16)
+    LC = RTLIB::UREM_I16;
+  else if (VT == MVT::i32)
     LC = RTLIB::UREM_I32;
   else if (VT == MVT::i64)
     LC = RTLIB::UREM_I64;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index df9af2147ca5..335c73cd5964 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -129,6 +129,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   if (!HasVectorValue)
     return TranslateLegalizeResults(Op, Result);
 
+  MVT QueryType;
   switch (Op.getOpcode()) {
   default:
     return TranslateLegalizeResults(Op, Result);
@@ -162,8 +163,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::ANY_EXTEND:
   case ISD::TRUNCATE:
   case ISD::SIGN_EXTEND:
-  case ISD::SINT_TO_FP:
-  case ISD::UINT_TO_FP:
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
   case ISD::FNEG:
@@ -183,10 +182,15 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::FRINT:
   case ISD::FNEARBYINT:
   case ISD::FFLOOR:
+    QueryType = Node->getValueType(0);
+    break;
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+    QueryType = Node->getOperand(0).getValueType();
     break;
   }
 
-  switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+  switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
   case TargetLowering::Promote:
     // "Promote" the operation by bitcasting
     Result = PromoteVectorOp(Op);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 195896ee89dc..a9adce8fdc5d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -154,7 +154,7 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
   // Do not accept an all-undef vector.
   if (i == e) return false;
 
-  // Do not accept build_vectors that aren't all constants or which have non-~0
+  // Do not accept build_vectors that aren't all constants or which have non-0
   // elements.
   SDValue Zero = N->getOperand(i);
   if (isa<ConstantSDNode>(Zero)) {
@@ -166,7 +166,7 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
   } else
     return false;
 
-  // Okay, we have at least one ~0 value, check to see if the rest match or are
+  // Okay, we have at least one 0 value, check to see if the rest match or are
   // undefs.
   for (++i; i != e; ++i)
     if (N->getOperand(i) != Zero &&
@@ -2807,16 +2807,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
     case ISD::ADDC:
     case ISD::ADDE:
     case ISD::SUB:
-    case ISD::FADD:
-    case ISD::FSUB:
-    case ISD::FMUL:
-    case ISD::FDIV:
-    case ISD::FREM:
     case ISD::UDIV:
     case ISD::SDIV:
     case ISD::UREM:
     case ISD::SREM:
       return N2;       // fold op(arg1, undef) -> undef
+    case ISD::FADD:
+    case ISD::FSUB:
+    case ISD::FMUL:
+    case ISD::FDIV:
+    case ISD::FREM:
+      if (UnsafeFPMath)
+        return N2;
+      break;
     case ISD::MUL:
     case ISD::AND:
     case ISD::SRL:
@@ -3059,7 +3062,7 @@ bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps,
   isSrcStr = isMemSrcFromString(Src, Str);
   bool isSrcConst = isa<ConstantSDNode>(Src);
   bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses();
-  MVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr);
+  MVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG);
   if (VT != MVT::iAny) {
     unsigned NewAlign = (unsigned)
       TLI.getTargetData()->getABITypeAlignment(VT.getTypeForMVT());
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
index 889d7f5dd934..93750d6b98c4 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
@@ -842,20 +842,6 @@ void SelectionDAGLowering::visit(unsigned Opcode, User &I) {
   }
 }
 
-void SelectionDAGLowering::visitAdd(User &I) {
-  if (I.getType()->isFPOrFPVector())
-    visitBinary(I, ISD::FADD);
-  else
-    visitBinary(I, ISD::ADD);
-}
-
-void SelectionDAGLowering::visitMul(User &I) {
-  if (I.getType()->isFPOrFPVector())
-    visitBinary(I, ISD::FMUL);
-  else
-    visitBinary(I, ISD::MUL);
-}
-
 SDValue SelectionDAGLowering::getValue(const Value *V) {
   SDValue &N = NodeMap[V];
   if (N.getNode()) return N;
@@ -2161,37 +2147,33 @@ void SelectionDAGLowering::visitSwitch(SwitchInst &SI) {
 }
 
 
-void SelectionDAGLowering::visitSub(User &I) {
+void SelectionDAGLowering::visitFSub(User &I) {
   // -0.0 - X --> fneg
   const Type *Ty = I.getType();
   if (isa<VectorType>(Ty)) {
     if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
       const VectorType *DestTy = cast<VectorType>(I.getType());
       const Type *ElTy = DestTy->getElementType();
-      if (ElTy->isFloatingPoint()) {
-        unsigned VL = DestTy->getNumElements();
-        std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
-        Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
-        if (CV == CNZ) {
-          SDValue Op2 = getValue(I.getOperand(1));
-          setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
-                                   Op2.getValueType(), Op2));
-          return;
-        }
-      }
-    }
-  }
-  if (Ty->isFloatingPoint()) {
-    if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
-      if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
+      unsigned VL = DestTy->getNumElements();
+      std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
+      Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
+      if (CV == CNZ) {
         SDValue Op2 = getValue(I.getOperand(1));
         setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
                                  Op2.getValueType(), Op2));
         return;
       }
+    }
   }
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
+    if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
+      SDValue Op2 = getValue(I.getOperand(1));
+      setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+                               Op2.getValueType(), Op2));
+      return;
+    }
 
-  visitBinary(I, Ty->isFPOrFPVector() ? ISD::FSUB : ISD::SUB);
+  visitBinary(I, ISD::FSUB);
 }
 
 void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
index 578aa591ce67..057c8410da0e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
@@ -469,9 +469,12 @@ private:
 
   void visitBinary(User &I, unsigned OpCode);
   void visitShift(User &I, unsigned Opcode);
-  void visitAdd(User &I);
-  void visitSub(User &I);
-  void visitMul(User &I);
+  void visitAdd(User &I)  { visitBinary(I, ISD::ADD); }
+  void visitFAdd(User &I) { visitBinary(I, ISD::FADD); }
+  void visitSub(User &I)  { visitBinary(I, ISD::SUB); }
+  void visitFSub(User &I);
+  void visitMul(User &I)  { visitBinary(I, ISD::MUL); }
+  void visitFMul(User &I) { visitBinary(I, ISD::FMUL); }
   void visitURem(User &I) { visitBinary(I, ISD::UREM); }
   void visitSRem(User &I) { visitBinary(I, ISD::SREM); }
   void visitFRem(User &I) { visitBinary(I, ISD::FREM); }
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 3334e53f0fbc..ab4cd515531c 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2070,13 +2070,13 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA,
 }
 
 
-/// isConsecutiveLoad - Return true if LD (which must be a LoadSDNode) is
-/// loading 'Bytes' bytes from a location that is 'Dist' units away from the
-/// location that the 'Base' load is loading from.
-bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base,
-                                       unsigned Bytes, int Dist,
+/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a 
+/// location that is 'Dist' units away from the location that the 'Base' load 
+/// is loading from.
+bool TargetLowering::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, 
+                                       unsigned Bytes, int Dist, 
                                        const MachineFrameInfo *MFI) const {
-  if (LD->getOperand(0).getNode() != Base->getOperand(0).getNode())
+  if (LD->getChain() != Base->getChain())
     return false;
   MVT VT = LD->getValueType(0);
   if (VT.getSizeInBits() / 8 != Bytes)
@@ -2094,6 +2094,11 @@ bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base,
     if (FS != BFS || FS != (int)Bytes) return false;
     return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
   }
+  if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) {
+    ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1));
+    if (V && (V->getSExtValue() == Dist*Bytes))
+      return true;
+  }
 
   GlobalValue *GV1 = NULL;
   GlobalValue *GV2 = NULL;
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index c31f622e249e..bd6584a53c12 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -33,99 +33,21 @@ STATISTIC(NumSUnfold , "Number of stores unfolded");
 STATISTIC(NumModRefUnfold, "Number of modref unfolded");
 
 namespace {
-  enum RewriterName { simple, local, trivial };
+  enum RewriterName { local, trivial };
 }
 
 static cl::opt<RewriterName>
 RewriterOpt("rewriter",
             cl::desc("Rewriter to use: (default: local)"),
             cl::Prefix,
-            cl::values(clEnumVal(simple,  "simple rewriter"),
-                       clEnumVal(local,   "local rewriter"),
+            cl::values(clEnumVal(local,   "local rewriter"),
                        clEnumVal(trivial, "trivial rewriter"),
                        clEnumValEnd),
             cl::init(local));
 
 VirtRegRewriter::~VirtRegRewriter() {}
 
- 
-// ****************************** //
-// Simple Spiller Implementation  //
-// ****************************** //
-
-struct VISIBILITY_HIDDEN SimpleRewriter : public VirtRegRewriter {
-
-  bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
-                            LiveIntervals* LIs) {
-    DOUT << "********** REWRITE MACHINE CODE **********\n";
-    DOUT << "********** Function: " << MF.getFunction()->getName() << '\n';
-    const TargetMachine &TM = MF.getTarget();
-    const TargetInstrInfo &TII = *TM.getInstrInfo();
-    const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
-
-
-    // LoadedRegs - Keep track of which vregs are loaded, so that we only load
-    // each vreg once (in the case where a spilled vreg is used by multiple
-    // operands).  This is always smaller than the number of operands to the
-    // current machine instr, so it should be small.
-    std::vector<unsigned> LoadedRegs;
-
-    for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
-         MBBI != E; ++MBBI) {
-      DOUT << MBBI->getBasicBlock()->getName() << ":\n";
-      MachineBasicBlock &MBB = *MBBI;
-      for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
-           MII != E; ++MII) {
-        MachineInstr &MI = *MII;
-        for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-          MachineOperand &MO = MI.getOperand(i);
-          if (MO.isReg() && MO.getReg()) {
-            if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
-              unsigned VirtReg = MO.getReg();
-              unsigned SubIdx = MO.getSubReg();
-              unsigned PhysReg = VRM.getPhys(VirtReg);
-              unsigned RReg = SubIdx ? TRI.getSubReg(PhysReg, SubIdx) : PhysReg;
-              if (!VRM.isAssignedReg(VirtReg)) {
-                int StackSlot = VRM.getStackSlot(VirtReg);
-                const TargetRegisterClass* RC = 
-                                             MF.getRegInfo().getRegClass(VirtReg);
-                
-                if (MO.isUse() &&
-                    std::find(LoadedRegs.begin(), LoadedRegs.end(), VirtReg)
-                             == LoadedRegs.end()) {
-                  TII.loadRegFromStackSlot(MBB, &MI, PhysReg, StackSlot, RC);
-                  MachineInstr *LoadMI = prior(MII);
-                  VRM.addSpillSlotUse(StackSlot, LoadMI);
-                  LoadedRegs.push_back(VirtReg);
-                  ++NumLoads;
-                  DOUT << '\t' << *LoadMI;
-                }
-
-                if (MO.isDef()) {
-                  TII.storeRegToStackSlot(MBB, next(MII), PhysReg, true,   
-                                          StackSlot, RC);
-                  MachineInstr *StoreMI = next(MII);
-                  VRM.addSpillSlotUse(StackSlot, StoreMI);
-                  ++NumStores;
-                }
-              }
-              MF.getRegInfo().setPhysRegUsed(RReg);
-              MI.getOperand(i).setReg(RReg);
-              MI.getOperand(i).setSubReg(0);
-            } else {
-              MF.getRegInfo().setPhysRegUsed(MO.getReg());
-            }
-          }
-        }
-
-        DOUT << '\t' << MI;
-        LoadedRegs.clear();
-      }
-    }
-    return true;
-  }
 
-};
  
 /// This class is intended for use with the new spilling framework only. It
 /// rewrites vreg def/uses to use the assigned preg, but does not insert any
@@ -2231,8 +2153,6 @@ llvm::VirtRegRewriter* llvm::createVirtRegRewriter() {
   default: assert(0 && "Unreachable!");
   case local:
     return new LocalRewriter();
-  case simple:
-    return new SimpleRewriter();
   case trivial:
     return new TrivialRewriter();
   }
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 29a05bbbdb64..a80513f3df9e 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -573,8 +573,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       return GV;
     }
     case Instruction::Add:
+    case Instruction::FAdd:
     case Instruction::Sub:
+    case Instruction::FSub:
     case Instruction::Mul:
+    case Instruction::FMul:
     case Instruction::UDiv:
     case Instruction::SDiv:
     case Instruction::URem:
@@ -605,11 +608,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       case Type::FloatTyID:
         switch (CE->getOpcode()) {
           default: assert(0 && "Invalid float opcode"); abort();
-          case Instruction::Add:  
+          case Instruction::FAdd:
             GV.FloatVal = LHS.FloatVal + RHS.FloatVal; break;
-          case Instruction::Sub:  
+          case Instruction::FSub:
             GV.FloatVal = LHS.FloatVal - RHS.FloatVal; break;
-          case Instruction::Mul:  
+          case Instruction::FMul:
             GV.FloatVal = LHS.FloatVal * RHS.FloatVal; break;
           case Instruction::FDiv: 
             GV.FloatVal = LHS.FloatVal / RHS.FloatVal; break;
@@ -620,11 +623,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       case Type::DoubleTyID:
         switch (CE->getOpcode()) {
           default: assert(0 && "Invalid double opcode"); abort();
-          case Instruction::Add:  
+          case Instruction::FAdd:
             GV.DoubleVal = LHS.DoubleVal + RHS.DoubleVal; break;
-          case Instruction::Sub:  
+          case Instruction::FSub:
             GV.DoubleVal = LHS.DoubleVal - RHS.DoubleVal; break;
-          case Instruction::Mul:  
+          case Instruction::FMul:
             GV.DoubleVal = LHS.DoubleVal * RHS.DoubleVal; break;
           case Instruction::FDiv: 
             GV.DoubleVal = LHS.DoubleVal / RHS.DoubleVal; break;
@@ -638,15 +641,15 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
         APFloat apfLHS = APFloat(LHS.IntVal);
         switch (CE->getOpcode()) {
           default: assert(0 && "Invalid long double opcode"); abort();
-          case Instruction::Add:  
+          case Instruction::FAdd:
             apfLHS.add(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
             GV.IntVal = apfLHS.bitcastToAPInt();
             break;
-          case Instruction::Sub:  
+          case Instruction::FSub:
             apfLHS.subtract(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
             GV.IntVal = apfLHS.bitcastToAPInt();
             break;
-          case Instruction::Mul:  
+          case Instruction::FMul:
             apfLHS.multiply(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
             GV.IntVal = apfLHS.bitcastToAPInt();
             break;
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 765fed248f98..7dfeae0ab2eb 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -64,45 +64,35 @@ void Interpreter::initializeExecutionEngine() {
      Dest.TY##Val = Src1.TY##Val OP Src2.TY##Val; \
      break
 
-#define IMPLEMENT_INTEGER_BINOP1(OP, TY) \
-   case Type::IntegerTyID: { \
-     Dest.IntVal = Src1.IntVal OP Src2.IntVal; \
-     break; \
-   }
-
-
-static void executeAddInst(GenericValue &Dest, GenericValue Src1, 
-                           GenericValue Src2, const Type *Ty) {
+static void executeFAddInst(GenericValue &Dest, GenericValue Src1,
+                            GenericValue Src2, const Type *Ty) {
   switch (Ty->getTypeID()) {
-    IMPLEMENT_INTEGER_BINOP1(+, Ty);
     IMPLEMENT_BINARY_OPERATOR(+, Float);
     IMPLEMENT_BINARY_OPERATOR(+, Double);
   default:
-    cerr << "Unhandled type for Add instruction: " << *Ty << "\n";
+    cerr << "Unhandled type for FAdd instruction: " << *Ty << "\n";
     abort();
   }
 }
 
-static void executeSubInst(GenericValue &Dest, GenericValue Src1, 
-                           GenericValue Src2, const Type *Ty) {
+static void executeFSubInst(GenericValue &Dest, GenericValue Src1,
+                            GenericValue Src2, const Type *Ty) {
   switch (Ty->getTypeID()) {
-    IMPLEMENT_INTEGER_BINOP1(-, Ty);
     IMPLEMENT_BINARY_OPERATOR(-, Float);
     IMPLEMENT_BINARY_OPERATOR(-, Double);
   default:
-    cerr << "Unhandled type for Sub instruction: " << *Ty << "\n";
+    cerr << "Unhandled type for FSub instruction: " << *Ty << "\n";
     abort();
   }
 }
 
-static void executeMulInst(GenericValue &Dest, GenericValue Src1, 
-                           GenericValue Src2, const Type *Ty) {
+static void executeFMulInst(GenericValue &Dest, GenericValue Src1,
+                            GenericValue Src2, const Type *Ty) {
   switch (Ty->getTypeID()) {
-    IMPLEMENT_INTEGER_BINOP1(*, Ty);
     IMPLEMENT_BINARY_OPERATOR(*, Float);
     IMPLEMENT_BINARY_OPERATOR(*, Double);
   default:
-    cerr << "Unhandled type for Mul instruction: " << *Ty << "\n";
+    cerr << "Unhandled type for FMul instruction: " << *Ty << "\n";
     abort();
   }
 }
@@ -550,11 +540,14 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
   GenericValue R;   // Result
 
   switch (I.getOpcode()) {
-  case Instruction::Add:   executeAddInst  (R, Src1, Src2, Ty); break;
-  case Instruction::Sub:   executeSubInst  (R, Src1, Src2, Ty); break;
-  case Instruction::Mul:   executeMulInst  (R, Src1, Src2, Ty); break;
-  case Instruction::FDiv:  executeFDivInst (R, Src1, Src2, Ty); break;
-  case Instruction::FRem:  executeFRemInst (R, Src1, Src2, Ty); break;
+  case Instruction::Add:   R.IntVal = Src1.IntVal + Src2.IntVal; break;
+  case Instruction::Sub:   R.IntVal = Src1.IntVal - Src2.IntVal; break;
+  case Instruction::Mul:   R.IntVal = Src1.IntVal * Src2.IntVal; break;
+  case Instruction::FAdd:  executeFAddInst(R, Src1, Src2, Ty); break;
+  case Instruction::FSub:  executeFSubInst(R, Src1, Src2, Ty); break;
+  case Instruction::FMul:  executeFMulInst(R, Src1, Src2, Ty); break;
+  case Instruction::FDiv:  executeFDivInst(R, Src1, Src2, Ty); break;
+  case Instruction::FRem:  executeFRemInst(R, Src1, Src2, Ty); break;
   case Instruction::UDiv:  R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break;
   case Instruction::SDiv:  R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break;
   case Instruction::URem:  R.IntVal = Src1.IntVal.urem(Src2.IntVal); break;
@@ -1258,18 +1251,21 @@ GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
   GenericValue Dest;
   const Type * Ty = CE->getOperand(0)->getType();
   switch (CE->getOpcode()) {
-  case Instruction::Add:  executeAddInst (Dest, Op0, Op1, Ty); break;
-  case Instruction::Sub:  executeSubInst (Dest, Op0, Op1, Ty); break;
-  case Instruction::Mul:  executeMulInst (Dest, Op0, Op1, Ty); break;
+  case Instruction::Add:  Dest.IntVal = Op0.IntVal + Op1.IntVal; break;
+  case Instruction::Sub:  Dest.IntVal = Op0.IntVal - Op1.IntVal; break;
+  case Instruction::Mul:  Dest.IntVal = Op0.IntVal * Op1.IntVal; break;
+  case Instruction::FAdd: executeFAddInst(Dest, Op0, Op1, Ty); break;
+  case Instruction::FSub: executeFSubInst(Dest, Op0, Op1, Ty); break;
+  case Instruction::FMul: executeFMulInst(Dest, Op0, Op1, Ty); break;
   case Instruction::FDiv: executeFDivInst(Dest, Op0, Op1, Ty); break;
   case Instruction::FRem: executeFRemInst(Dest, Op0, Op1, Ty); break;
   case Instruction::SDiv: Dest.IntVal = Op0.IntVal.sdiv(Op1.IntVal); break;
   case Instruction::UDiv: Dest.IntVal = Op0.IntVal.udiv(Op1.IntVal); break;
   case Instruction::URem: Dest.IntVal = Op0.IntVal.urem(Op1.IntVal); break;
   case Instruction::SRem: Dest.IntVal = Op0.IntVal.srem(Op1.IntVal); break;
-  case Instruction::And:  Dest.IntVal = Op0.IntVal.And(Op1.IntVal); break;
-  case Instruction::Or:   Dest.IntVal = Op0.IntVal.Or(Op1.IntVal); break;
-  case Instruction::Xor:  Dest.IntVal = Op0.IntVal.Xor(Op1.IntVal); break;
+  case Instruction::And:  Dest.IntVal = Op0.IntVal & Op1.IntVal; break;
+  case Instruction::Or:   Dest.IntVal = Op0.IntVal | Op1.IntVal; break;
+  case Instruction::Xor:  Dest.IntVal = Op0.IntVal ^ Op1.IntVal; break;
   case Instruction::Shl:  
     Dest.IntVal = Op0.IntVal.shl(Op1.IntVal.getZExtValue());
     break;
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 89131a0dde7f..43f23e443426 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -551,7 +551,7 @@ namespace {
 
     // When outputting a function stub in the context of some other function, we
     // save BufferBegin/BufferEnd/CurBufferPtr here.
-    unsigned char *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr;
+    uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr;
 
     /// Relocations - These are the relocations that the function needs, as
     /// emitted.
@@ -891,8 +891,11 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
       break;
     }
     case Instruction::Add:
+    case Instruction::FAdd:
     case Instruction::Sub:
+    case Instruction::FSub:
     case Instruction::Mul:
+    case Instruction::FMul:
     case Instruction::UDiv:
     case Instruction::SDiv:
     case Instruction::URem:
@@ -1056,11 +1059,11 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
   
   // FnStart is the start of the text, not the start of the constant pool and
   // other per-function data.
-  unsigned char *FnStart =
-    (unsigned char *)TheJIT->getPointerToGlobalIfAvailable(F.getFunction());
+  uint8_t *FnStart =
+    (uint8_t *)TheJIT->getPointerToGlobalIfAvailable(F.getFunction());
 
   // FnEnd is the end of the function's machine code.
-  unsigned char *FnEnd = CurBufferPtr;
+  uint8_t *FnEnd = CurBufferPtr;
 
   if (!Relocations.empty()) {
     CurFn = F.getFunction();
@@ -1183,7 +1186,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
     } else {
       DOUT << "JIT: Binary code:\n";
       DOUT << std::hex;
-      unsigned char* q = FnStart;
+      uint8_t* q = FnStart;
       for (int i = 0; q < FnEnd; q += 4, ++i) {
         if (i == 4)
           i = 0;
@@ -1221,7 +1224,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
     BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(),
                                                              ActualSize);
     BufferEnd = BufferBegin+ActualSize;
-    unsigned char* FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd);
+    uint8_t* FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd);
     MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr,
                               FrameRegister);
     BufferBegin = SavedBufferBegin;
@@ -1416,7 +1419,7 @@ void JITEmitter::startGVStub(const GlobalValue* GV, void *Buffer,
   SavedBufferEnd = BufferEnd;
   SavedCurBufferPtr = CurBufferPtr;
   
-  BufferBegin = CurBufferPtr = (unsigned char *)Buffer;
+  BufferBegin = CurBufferPtr = (uint8_t *)Buffer;
   BufferEnd = BufferBegin+StubSize+1;
 }
 
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index 2819b6d4653b..70ccdccb8049 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -257,9 +257,9 @@ namespace {
     // When emitting code into a memory block, this is the block.
     MemoryRangeHeader *CurBlock;
     
-    unsigned char *CurStubPtr, *StubBase;
-    unsigned char *GOTBase;      // Target Specific reserved memory
-    void *DlsymTable;            // Stub external symbol information
+    uint8_t *CurStubPtr, *StubBase;
+    uint8_t *GOTBase;     // Target Specific reserved memory
+    void *DlsymTable;     // Stub external symbol information
 
     // Centralize memory block allocation.
     sys::MemoryBlock getNewMemoryBlock(unsigned size);
@@ -273,12 +273,12 @@ namespace {
     void AllocateGOT();
     void SetDlsymTable(void *);
     
-    unsigned char *allocateStub(const GlobalValue* F, unsigned StubSize,
-                                unsigned Alignment);
+    uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
+                          unsigned Alignment);
     
     /// startFunctionBody - When a function starts, allocate a block of free
     /// executable memory, returning a pointer to it and its actual size.
-    unsigned char *startFunctionBody(const Function *F, uintptr_t &ActualSize) {
+    uint8_t *startFunctionBody(const Function *F, uintptr_t &ActualSize) {
       
       FreeRangeHeader* candidateBlock = FreeMemoryList;
       FreeRangeHeader* head = FreeMemoryList;
@@ -301,18 +301,18 @@ namespace {
       // Allocate the entire memory block.
       FreeMemoryList = candidateBlock->AllocateBlock();
       ActualSize = CurBlock->BlockSize-sizeof(MemoryRangeHeader);
-      return (unsigned char *)(CurBlock+1);
+      return (uint8_t *)(CurBlock+1);
     }
     
     /// endFunctionBody - The function F is now allocated, and takes the memory
     /// in the range [FunctionStart,FunctionEnd).
-    void endFunctionBody(const Function *F, unsigned char *FunctionStart,
-                         unsigned char *FunctionEnd) {
+    void endFunctionBody(const Function *F, uint8_t *FunctionStart,
+                         uint8_t *FunctionEnd) {
       assert(FunctionEnd > FunctionStart);
-      assert(FunctionStart == (unsigned char *)(CurBlock+1) &&
+      assert(FunctionStart == (uint8_t *)(CurBlock+1) &&
              "Mismatched function start/end!");
 
-      uintptr_t BlockSize = FunctionEnd - (unsigned char *)CurBlock;
+      uintptr_t BlockSize = FunctionEnd - (uint8_t *)CurBlock;
       FunctionBlocks[F] = CurBlock;
 
       // Release the memory at the end of this block that isn't needed.
@@ -320,17 +320,17 @@ namespace {
     }
 
     /// allocateSpace - Allocate a memory block of the given size.
-    unsigned char *allocateSpace(intptr_t Size, unsigned Alignment) {
+    uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
       CurBlock = FreeMemoryList;
       FreeMemoryList = FreeMemoryList->AllocateBlock();
 
-      unsigned char *result = (unsigned char *)CurBlock+1;
+      uint8_t *result = (uint8_t *)CurBlock+1;
 
       if (Alignment == 0) Alignment = 1;
-      result = (unsigned char*)(((intptr_t)result+Alignment-1) &
+      result = (uint8_t*)(((intptr_t)result+Alignment-1) &
                ~(intptr_t)(Alignment-1));
 
-      uintptr_t BlockSize = result + Size - (unsigned char *)CurBlock;
+      uintptr_t BlockSize = result + Size - (uint8_t *)CurBlock;
       FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
 
       return result;
@@ -338,28 +338,26 @@ namespace {
 
     /// startExceptionTable - Use startFunctionBody to allocate memory for the 
     /// function's exception table.
-    unsigned char* startExceptionTable(const Function* F, 
-                                       uintptr_t &ActualSize) {
+    uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) {
       return startFunctionBody(F, ActualSize);
     }
 
     /// endExceptionTable - The exception table of F is now allocated, 
     /// and takes the memory in the range [TableStart,TableEnd).
-    void endExceptionTable(const Function *F, unsigned char *TableStart,
-                           unsigned char *TableEnd, 
-                           unsigned char* FrameRegister) {
+    void endExceptionTable(const Function *F, uint8_t *TableStart,
+                           uint8_t *TableEnd, uint8_t* FrameRegister) {
       assert(TableEnd > TableStart);
-      assert(TableStart == (unsigned char *)(CurBlock+1) &&
+      assert(TableStart == (uint8_t *)(CurBlock+1) &&
              "Mismatched table start/end!");
       
-      uintptr_t BlockSize = TableEnd - (unsigned char *)CurBlock;
+      uintptr_t BlockSize = TableEnd - (uint8_t *)CurBlock;
       TableBlocks[F] = CurBlock;
 
       // Release the memory at the end of this block that isn't needed.
       FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
     }
     
-    unsigned char *getGOTBase() const {
+    uint8_t *getGOTBase() const {
       return GOTBase;
     }
     
@@ -433,7 +431,7 @@ DefaultJITMemoryManager::DefaultJITMemoryManager() {
   sys::MemoryBlock MemBlock = getNewMemoryBlock(16 << 20);
 #endif
 
-  unsigned char *MemBase = static_cast<unsigned char*>(MemBlock.base());
+  uint8_t *MemBase = static_cast<uint8_t*>(MemBlock.base());
 
   // Allocate stubs backwards from the base, allocate functions forward
   // from the base.
@@ -492,7 +490,7 @@ DefaultJITMemoryManager::DefaultJITMemoryManager() {
 
 void DefaultJITMemoryManager::AllocateGOT() {
   assert(GOTBase == 0 && "Cannot allocate the got multiple times");
-  GOTBase = new unsigned char[sizeof(void*) * 8192];
+  GOTBase = new uint8_t[sizeof(void*) * 8192];
   HasGOT = true;
 }
 
@@ -508,12 +506,12 @@ DefaultJITMemoryManager::~DefaultJITMemoryManager() {
   Blocks.clear();
 }
 
-unsigned char *DefaultJITMemoryManager::allocateStub(const GlobalValue* F,
+uint8_t *DefaultJITMemoryManager::allocateStub(const GlobalValue* F,
                                                      unsigned StubSize,
                                                      unsigned Alignment) {
   CurStubPtr -= StubSize;
-  CurStubPtr = (unsigned char*)(((intptr_t)CurStubPtr) &
-                                ~(intptr_t)(Alignment-1));
+  CurStubPtr = (uint8_t*)(((intptr_t)CurStubPtr) &
+                          ~(intptr_t)(Alignment-1));
   if (CurStubPtr < StubBase) {
     // FIXME: allocate a new block
     fprintf(stderr, "JIT ran out of memory for function stubs!\n");
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 6ac37bc840c6..42e6fda97baf 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Format.h"
 #include "llvm/System/Program.h"
+#include "llvm/System/Process.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/Compiler.h"
@@ -301,6 +302,35 @@ uint64_t raw_fd_ostream::seek(uint64_t off) {
   return pos;  
 }
 
+raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold,
+                                         bool bg) {
+  if (sys::Process::ColorNeedsFlush())
+    flush();
+  const char *colorcode =
+    (colors == SAVEDCOLOR) ? sys::Process::OutputBold(bg)
+    : sys::Process::OutputColor(colors, bold, bg);
+  if (colorcode) {
+    unsigned len = strlen(colorcode);
+    write(colorcode, len);
+    // don't account colors towards output characters
+    pos -= len;
+  }
+  return *this;
+}
+
+raw_ostream &raw_fd_ostream::resetColor() {
+  if (sys::Process::ColorNeedsFlush())
+    flush();
+  const char *colorcode = sys::Process::ResetColor();
+  if (colorcode) {
+    unsigned len = strlen(colorcode);
+    write(colorcode, len);
+    // don't account colors towards output characters
+    pos -= len;
+  }
+  return *this;
+}
+
 //===----------------------------------------------------------------------===//
 //  raw_stdout/err_ostream
 //===----------------------------------------------------------------------===//
diff --git a/lib/System/Unix/Process.inc b/lib/System/Unix/Process.inc
index 74b9bb8b142a..2da31c9f215b 100644
--- a/lib/System/Unix/Process.inc
+++ b/lib/System/Unix/Process.inc
@@ -235,3 +235,62 @@ unsigned Process::StandardErrColumns() {
 
   return getColumns(2);
 }
+
+static bool terminalHasColors() {
+  if (const char *term = std::getenv("TERM")) {
+    // Most modern terminals support ANSI escape sequences for colors.
+    // We could check terminfo, or have a list of known terms that support
+    // colors, but that would be overkill.
+    // The user can always ask for no colors by setting TERM to dumb, or
+    // using a commandline flag.
+    return strcmp(term, "dumb") != 0;
+  }
+  return false;
+}
+
+bool Process::StandardOutHasColors() {
+  if (!StandardOutIsDisplayed())
+    return false;
+  return terminalHasColors();
+}
+
+bool Process::StandardErrHasColors() {
+  if (!StandardErrIsDisplayed())
+    return false;
+  return terminalHasColors();
+}
+
+bool Process::ColorNeedsFlush() {
+  // No, we use ANSI escape sequences.
+  return false;
+}
+
+#define COLOR(FGBG, CODE, BOLD) "\033[0;" BOLD FGBG CODE "m"
+
+#define ALLCOLORS(FGBG,BOLD) {\
+    COLOR(FGBG, "0", BOLD),\
+    COLOR(FGBG, "1", BOLD),\
+    COLOR(FGBG, "2", BOLD),\
+    COLOR(FGBG, "3", BOLD),\
+    COLOR(FGBG, "4", BOLD),\
+    COLOR(FGBG, "5", BOLD),\
+    COLOR(FGBG, "6", BOLD),\
+    COLOR(FGBG, "7", BOLD)\
+  }
+
+static const char* colorcodes[2][2][8] = {
+ { ALLCOLORS("3",""), ALLCOLORS("3","1;") },
+ { ALLCOLORS("4",""), ALLCOLORS("4","1;") }
+};
+
+const char *Process::OutputColor(char code, bool bold, bool bg) {
+  return colorcodes[bg?1:0][bold?1:0][code&7];
+}
+
+const char *Process::OutputBold(bool bg) {
+  return "\033[1m";
+}
+
+const char *Process::ResetColor() {
+  return "\033[0m";
+}
diff --git a/lib/System/Win32/Process.inc b/lib/System/Win32/Process.inc
index e1d7a9222f75..cfbe33c85a2f 100644
--- a/lib/System/Win32/Process.inc
+++ b/lib/System/Win32/Process.inc
@@ -147,4 +147,71 @@ unsigned Process::StandardErrColumns() {
   return Columns;
 }
 
+// It always has colors.
+bool Process::StandardErrHasColors() {
+  return StandardErrIsDisplayed();
+}
+
+bool Process::StandardOutHasColors() {
+  return StandardOutIsDisplayed();
+}
+
+namespace {
+class DefaultColors
+{
+  private:
+    WORD defaultColor;
+  public:
+    DefaultColors()
+     :defaultColor(GetCurrentColor()) {}
+    static unsigned GetCurrentColor() {
+      CONSOLE_SCREEN_BUFFER_INFO csbi;
+      if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi))
+        return csbi.wAttributes;
+      return 0;
+    }
+    WORD operator()() const { return defaultColor; }
+};
+
+DefaultColors defaultColors;
+}
+
+bool Process::ColorNeedsFlush() {
+  return true;
+}
+
+const char *Process::OutputBold(bool bg) {
+  WORD colors = DefaultColors::GetCurrentColor();
+  if (bg)
+    colors |= BACKGROUND_INTENSITY;
+  else
+    colors |= FOREGROUND_INTENSITY;
+  SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors);
+  return 0;
+}
+
+const char *Process::OutputColor(char code, bool bold, bool bg) {
+  WORD colors;
+  if (bg) {
+    colors = ((code&1) ? BACKGROUND_RED : 0) |
+      ((code&2) ? BACKGROUND_GREEN : 0 ) |
+      ((code&4) ? BACKGROUND_BLUE : 0);
+    if (bold)
+      colors |= BACKGROUND_INTENSITY;
+  } else {
+    colors = ((code&1) ? FOREGROUND_RED : 0) |
+      ((code&2) ? FOREGROUND_GREEN : 0 ) |
+      ((code&4) ? FOREGROUND_BLUE : 0);
+    if (bold)
+      colors |= FOREGROUND_INTENSITY;
+  }
+  SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors);
+  return 0;
+}
+
+const char *Process::ResetColor() {
+  SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), defaultColors());
+  return 0;
+}
+
 }
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index 3a038c9a8c37..a75ed3bd5339 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -17,7 +17,6 @@
 #include "llvm/Type.h"
 #include "llvm/Support/Streams.h"
 #include "llvm/Support/raw_ostream.h"
-#include <ostream>
 using namespace llvm;
 
 ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, unsigned id,
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 09b8ce07d222..963ff0def648 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -64,11 +64,15 @@ namespace {
     typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
     typedef MemOpQueue::iterator MemOpQueueIter;
 
-    SmallVector<MachineBasicBlock::iterator, 4>
-    MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
-                 int Opcode, unsigned Size,
-                 ARMCC::CondCodes Pred, unsigned PredReg,
-                 unsigned Scratch, MemOpQueue &MemOps);
+    bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                  int Offset, unsigned Base, bool BaseKill, int Opcode,
+                  ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
+                  DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
+    void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
+                      int Opcode, unsigned Size,
+                      ARMCC::CondCodes Pred, unsigned PredReg,
+                      unsigned Scratch, MemOpQueue &MemOps,
+                      SmallVector<MachineBasicBlock::iterator, 4> &Merges);
 
     void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
@@ -108,16 +112,16 @@ static int getLoadStoreMultipleOpcode(int Opcode) {
   return 0;
 }
 
-/// mergeOps - Create and insert a LDM or STM with Base as base register and
+/// MergeOps - Create and insert a LDM or STM with Base as base register and
 /// registers in Regs as the register operands that would be loaded / stored.
 /// It returns true if the transformation is done. 
-static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-                     int Offset, unsigned Base, bool BaseKill, int Opcode,
-                     ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
-                     SmallVector<std::pair<unsigned, bool>, 8> &Regs,
-                     const TargetInstrInfo *TII) {
-  // FIXME would it be better to take a DL from one of the loads arbitrarily?
-  DebugLoc dl = DebugLoc::getUnknownLoc();
+bool
+ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MBBI,
+                          int Offset, unsigned Base, bool BaseKill,
+                          int Opcode, ARMCC::CondCodes Pred,
+                          unsigned PredReg, unsigned Scratch, DebugLoc dl,
+                          SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
   // Only a single register to load / store. Don't bother.
   unsigned NumRegs = Regs.size();
   if (NumRegs <= 1)
@@ -185,20 +189,21 @@ static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
 
 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
 /// load / store multiple instructions.
-SmallVector<MachineBasicBlock::iterator, 4>
+void
 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
-                              unsigned Base, int Opcode, unsigned Size,
-                              ARMCC::CondCodes Pred, unsigned PredReg,
-                              unsigned Scratch, MemOpQueue &MemOps) {
-  SmallVector<MachineBasicBlock::iterator, 4> Merges;
+                          unsigned Base, int Opcode, unsigned Size,
+                          ARMCC::CondCodes Pred, unsigned PredReg,
+                          unsigned Scratch, MemOpQueue &MemOps,
+                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
   bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
   int Offset = MemOps[SIndex].Offset;
   int SOffset = Offset;
   unsigned Pos = MemOps[SIndex].Position;
   MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
-  unsigned PReg = MemOps[SIndex].MBBI->getOperand(0).getReg();
+  DebugLoc dl = Loc->getDebugLoc();
+  unsigned PReg = Loc->getOperand(0).getReg();
   unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
-  bool isKill = MemOps[SIndex].MBBI->getOperand(0).isKill();
+  bool isKill = Loc->getOperand(0).isKill();
 
   SmallVector<std::pair<unsigned,bool>, 8> Regs;
   Regs.push_back(std::make_pair(PReg, isKill));
@@ -216,18 +221,17 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
       PRegNum = RegNum;
     } else {
       // Can't merge this in. Try merge the earlier ones first.
-      if (mergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg,
-                   Scratch, Regs, TII)) {
+      if (MergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg,
+                   Scratch, dl, Regs)) {
         Merges.push_back(prior(Loc));
         for (unsigned j = SIndex; j < i; ++j) {
           MBB.erase(MemOps[j].MBBI);
           MemOps[j].Merged = true;
         }
       }
-      SmallVector<MachineBasicBlock::iterator, 4> Merges2 =
-        MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,MemOps);
-      Merges.append(Merges2.begin(), Merges2.end());
-      return Merges;
+      MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
+                   MemOps, Merges);
+      return;
     }
 
     if (MemOps[i].Position > Pos) {
@@ -237,8 +241,8 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
   }
 
   bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
-  if (mergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg,
-               Scratch, Regs, TII)) {
+  if (MergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg,
+               Scratch, dl, Regs)) {
     Merges.push_back(prior(Loc));
     for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
       MBB.erase(MemOps[i].MBBI);
@@ -246,7 +250,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
     }
   }
 
-  return Merges;
+  return;
 }
 
 /// getInstrPredicate - If instruction is predicated, returns its predicate
@@ -530,7 +534,7 @@ static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
     if (isAM2)
       // STR_PRE, STR_POST;
       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
-        .addReg(MO.getReg(), getKillRegState(BaseKill))
+        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
         .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
     else
       // FSTMS, FSTMD
@@ -590,6 +594,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
   ARMCC::CondCodes CurrPred = ARMCC::AL;
   unsigned CurrPredReg = 0;
   unsigned Position = 0;
+  SmallVector<MachineBasicBlock::iterator,4> Merges;
 
   RS->enterBasicBlock(&MBB);
   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
@@ -689,16 +694,16 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
         RS->forward(prior(MBBI));
 
         // Merge ops.
-        SmallVector<MachineBasicBlock::iterator,4> MBBII =
-          MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
-                       CurrPred, CurrPredReg, Scratch, MemOps);
+        Merges.clear();
+        MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
+                     CurrPred, CurrPredReg, Scratch, MemOps, Merges);
 
         // Try folding preceeding/trailing base inc/dec into the generated
         // LDM/STM ops.
-        for (unsigned i = 0, e = MBBII.size(); i < e; ++i)
-          if (mergeBaseUpdateLSMultiple(MBB, MBBII[i], Advance, MBBI))
+        for (unsigned i = 0, e = Merges.size(); i < e; ++i)
+          if (mergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
             ++NumMerges;
-        NumMerges += MBBII.size();
+        NumMerges += Merges.size();
 
         // Try folding preceeding/trailing base inc/dec into those load/store
         // that were not merged to form LDM/STM ops.
@@ -709,6 +714,13 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
 
         // RS may be pointing to an instruction that's deleted. 
         RS->skipTo(prior(MBBI));
+      } else if (NumMemOps == 1) {
+        // Try folding preceeding/trailing base inc/dec into the single
+        // load/store.
+        if (mergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
+          ++NumMerges;
+          RS->forward(prior(MBBI));
+        }
       }
 
       CurrBase = 0;
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 6662be12a578..0b0e28928288 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -100,7 +100,7 @@ public:
     GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
     JumpTableUId(0), ConstPoolEntryUId(0) {}
 
-  ARMFunctionInfo(MachineFunction &MF) :
+  explicit ARMFunctionInfo(MachineFunction &MF) :
     isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
     Align(isThumb ? 1U : 2U),
     VarArgsRegSaveSize(0), HasStackFrame(false),
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index e8daf7489ebe..b95d1f93aba5 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -103,28 +103,28 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
   let MethodBodies = [{
     // FP is R11, R9 is available.
     static const unsigned ARM_GPR_AO_1[] = {
-      ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
       ARM::R12,ARM::LR,
       ARM::R4, ARM::R5, ARM::R6, ARM::R7,
       ARM::R8, ARM::R9, ARM::R10,
       ARM::R11 };
     // FP is R11, R9 is not available.
     static const unsigned ARM_GPR_AO_2[] = {
-      ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
       ARM::R12,ARM::LR,
       ARM::R4, ARM::R5, ARM::R6, ARM::R7,
       ARM::R8, ARM::R10,
       ARM::R11 };
     // FP is R7, R9 is available.
     static const unsigned ARM_GPR_AO_3[] = {
-      ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
       ARM::R12,ARM::LR,
       ARM::R4, ARM::R5, ARM::R6,
       ARM::R8, ARM::R9, ARM::R10,ARM::R11,
       ARM::R7 };
     // FP is R7, R9 is not available.
     static const unsigned ARM_GPR_AO_4[] = {
-      ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
       ARM::R12,ARM::LR,
       ARM::R4, ARM::R5, ARM::R6,
       ARM::R8, ARM::R10,ARM::R11,
@@ -186,7 +186,7 @@ def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
   // scavenging.
   let MethodBodies = [{
     static const unsigned THUMB_tGPR_AO[] = {
-      ARM::R2, ARM::R1, ARM::R0,
+      ARM::R0, ARM::R1, ARM::R2,
       ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
 
     // FP is R7, only low registers available.
diff --git a/lib/Target/Alpha/AlphaMachineFunctionInfo.h b/lib/Target/Alpha/AlphaMachineFunctionInfo.h
index 47de5dfad94d..8221fc7a7c97 100644
--- a/lib/Target/Alpha/AlphaMachineFunctionInfo.h
+++ b/lib/Target/Alpha/AlphaMachineFunctionInfo.h
@@ -33,8 +33,8 @@ class AlphaMachineFunctionInfo : public MachineFunctionInfo {
 public:
   AlphaMachineFunctionInfo() : GlobalBaseReg(0), GlobalRetAddr(0) {}
 
-  AlphaMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0),
-                                                  GlobalRetAddr(0) {}
+  explicit AlphaMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0),
+                                                           GlobalRetAddr(0) {}
 
   unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
   void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 4d7b54503e89..5814d2750edc 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -1000,8 +1000,11 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
       Out << ')';
       return;
     case Instruction::Add:
+    case Instruction::FAdd:
     case Instruction::Sub:
+    case Instruction::FSub:
     case Instruction::Mul:
+    case Instruction::FMul:
     case Instruction::SDiv:
     case Instruction::UDiv:
     case Instruction::FDiv:
@@ -1020,9 +1023,12 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
       bool NeedsClosingParens = printConstExprCast(CE, Static); 
       printConstantWithCast(CE->getOperand(0), CE->getOpcode());
       switch (CE->getOpcode()) {
-      case Instruction::Add: Out << " + "; break;
-      case Instruction::Sub: Out << " - "; break;
-      case Instruction::Mul: Out << " * "; break;
+      case Instruction::Add:
+      case Instruction::FAdd: Out << " + "; break;
+      case Instruction::Sub:
+      case Instruction::FSub: Out << " - "; break;
+      case Instruction::Mul:
+      case Instruction::FMul: Out << " * "; break;
       case Instruction::URem:
       case Instruction::SRem: 
       case Instruction::FRem: Out << " % "; break;
@@ -1322,8 +1328,6 @@ bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) {
   case Instruction::Mul:
     // We need to cast integer arithmetic so that it is always performed
     // as unsigned, to avoid undefined behavior on overflow.
-    if (!Ty->isIntOrIntVector()) break;
-    // FALL THROUGH
   case Instruction::LShr:
   case Instruction::URem: 
   case Instruction::UDiv: NeedsExplicitCast = true; break;
@@ -1387,8 +1391,6 @@ void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
     case Instruction::Mul:
       // We need to cast integer arithmetic so that it is always performed
       // as unsigned, to avoid undefined behavior on overflow.
-      if (!OpTy->isIntOrIntVector()) break;
-      // FALL THROUGH
     case Instruction::LShr:
     case Instruction::UDiv:
     case Instruction::URem:
@@ -1505,8 +1507,6 @@ bool CWriter::writeInstructionCast(const Instruction &I) {
   case Instruction::Mul:
     // We need to cast integer arithmetic so that it is always performed
     // as unsigned, to avoid undefined behavior on overflow.
-    if (!Ty->isIntOrIntVector()) break;
-    // FALL THROUGH
   case Instruction::LShr:
   case Instruction::URem: 
   case Instruction::UDiv: 
@@ -1552,8 +1552,6 @@ void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) {
     case Instruction::Mul:
       // We need to cast integer arithmetic so that it is always performed
       // as unsigned, to avoid undefined behavior on overflow.
-      if (!OpTy->isIntOrIntVector()) break;
-      // FALL THROUGH
     case Instruction::LShr:
     case Instruction::UDiv:
     case Instruction::URem: // Cast to unsigned first
@@ -2606,6 +2604,10 @@ void CWriter::visitBinaryOperator(Instruction &I) {
     Out << "-(";
     writeOperand(BinaryOperator::getNegArgument(cast<BinaryOperator>(&I)));
     Out << ")";
+  } else if (BinaryOperator::isFNeg(&I)) {
+    Out << "-(";
+    writeOperand(BinaryOperator::getFNegArgument(cast<BinaryOperator>(&I)));
+    Out << ")";
   } else if (I.getOpcode() == Instruction::FRem) {
     // Output a call to fmod/fmodf instead of emitting a%b
     if (I.getType() == Type::FloatTy)
@@ -2630,9 +2632,12 @@ void CWriter::visitBinaryOperator(Instruction &I) {
     writeOperandWithCast(I.getOperand(0), I.getOpcode());
 
     switch (I.getOpcode()) {
-    case Instruction::Add:  Out << " + "; break;
-    case Instruction::Sub:  Out << " - "; break;
-    case Instruction::Mul:  Out << " * "; break;
+    case Instruction::Add:
+    case Instruction::FAdd: Out << " + "; break;
+    case Instruction::Sub:
+    case Instruction::FSub: Out << " - "; break;
+    case Instruction::Mul:
+    case Instruction::FMul: Out << " * "; break;
     case Instruction::URem:
     case Instruction::SRem:
     case Instruction::FRem: Out << " % "; break;
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 4082989c4c48..04a6829d9cf6 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -865,8 +865,11 @@ namespace {
         Out << "Constant* " << constName << " = ConstantExpr::";
         switch (CE->getOpcode()) {
         case Instruction::Add:    Out << "getAdd(";  break;
+        case Instruction::FAdd:   Out << "getFAdd(";  break;
         case Instruction::Sub:    Out << "getSub("; break;
+        case Instruction::FSub:   Out << "getFSub("; break;
         case Instruction::Mul:    Out << "getMul("; break;
+        case Instruction::FMul:   Out << "getFMul("; break;
         case Instruction::UDiv:   Out << "getUDiv("; break;
         case Instruction::SDiv:   Out << "getSDiv("; break;
         case Instruction::FDiv:   Out << "getFDiv("; break;
@@ -1159,8 +1162,11 @@ namespace {
       break;
     }
     case Instruction::Add:
+    case Instruction::FAdd:
     case Instruction::Sub:
+    case Instruction::FSub:
     case Instruction::Mul:
+    case Instruction::FMul:
     case Instruction::UDiv:
     case Instruction::SDiv:
     case Instruction::FDiv:
@@ -1176,8 +1182,11 @@ namespace {
       Out << "BinaryOperator* " << iName << " = BinaryOperator::Create(";
       switch (I->getOpcode()) {
       case Instruction::Add: Out << "Instruction::Add"; break;
+      case Instruction::FAdd: Out << "Instruction::FAdd"; break;
       case Instruction::Sub: Out << "Instruction::Sub"; break;
+      case Instruction::FSub: Out << "Instruction::FSub"; break;
       case Instruction::Mul: Out << "Instruction::Mul"; break;
+      case Instruction::FMul: Out << "Instruction::FMul"; break;
       case Instruction::UDiv:Out << "Instruction::UDiv"; break;
       case Instruction::SDiv:Out << "Instruction::SDiv"; break;
       case Instruction::FDiv:Out << "Instruction::FDiv"; break;
diff --git a/lib/Target/IA64/IA64MachineFunctionInfo.h b/lib/Target/IA64/IA64MachineFunctionInfo.h
index fb930564a9d1..e6254d69d6a0 100644
--- a/lib/Target/IA64/IA64MachineFunctionInfo.h
+++ b/lib/Target/IA64/IA64MachineFunctionInfo.h
@@ -24,7 +24,7 @@ public:
   // by this machinefunction? (used to compute the appropriate
   // entry in the 'alloc' instruction at the top of the
   // machinefunction)
-  IA64FunctionInfo(MachineFunction& MF) { outRegsUsed=0; };
+  explicit IA64FunctionInfo(MachineFunction& MF) { outRegsUsed=0; };
 
 };
 
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index ada851d4f226..37e5b1eccde4 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -1060,12 +1060,15 @@ void MSILWriter::printInstruction(const Instruction* Inst) {
     break;
   // Binary
   case Instruction::Add:
+  case Instruction::FAdd:
     printBinaryInstruction("add",Left,Right);
     break;
   case Instruction::Sub:
+  case Instruction::FSub:
     printBinaryInstruction("sub",Left,Right);
     break;
-  case Instruction::Mul:  
+  case Instruction::Mul:
+  case Instruction::FMul:
     printBinaryInstruction("mul",Left,Right);
     break;
   case Instruction::UDiv:
@@ -1322,12 +1325,15 @@ void MSILWriter::printConstantExpr(const ConstantExpr* CE) {
     printSelectInstruction(CE->getOperand(0),CE->getOperand(1),CE->getOperand(2));
     break;
   case Instruction::Add:
+  case Instruction::FAdd:
     printBinaryInstruction("add",left,right);
     break;
   case Instruction::Sub:
+  case Instruction::FSub:
     printBinaryInstruction("sub",left,right);
     break;
   case Instruction::Mul:
+  case Instruction::FMul:
     printBinaryInstruction("mul",left,right);
     break;
   case Instruction::UDiv:
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index b94d7e44cace..1d26ae3e667d 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -28,7 +28,8 @@ class MSP430MachineFunctionInfo : public MachineFunctionInfo {
 public:
   MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {}
 
-  MSP430MachineFunctionInfo(MachineFunction &MF) : CalleeSavedFrameSize(0) {}
+  explicit MSP430MachineFunctionInfo(MachineFunction &MF)
+    : CalleeSavedFrameSize(0) {}
 
   unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
   void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index 0f83fd2353a9..ac9a143bc24e 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -46,6 +46,16 @@ static const char *getIntrinsicName(unsigned opcode) {
   case PIC16ISD::MUL_I8: Basename = "mul.i8"; break;
   case RTLIB::MUL_I16: Basename = "mul.i16"; break;
   case RTLIB::MUL_I32: Basename = "mul.i32"; break;
+
+  case RTLIB::SDIV_I16: Basename = "sdiv.i16"; break;
+  case RTLIB::SDIV_I32: Basename = "sdiv.i32"; break;
+  case RTLIB::UDIV_I16: Basename = "udiv.i16"; break;
+  case RTLIB::UDIV_I32: Basename = "udiv.i32"; break;
+
+  case RTLIB::SREM_I16: Basename = "srem.i16"; break;
+  case RTLIB::SREM_I32: Basename = "srem.i32"; break;
+  case RTLIB::UREM_I16: Basename = "urem.i16"; break;
+  case RTLIB::UREM_I32: Basename = "urem.i32"; break;
   }
   
   std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
@@ -90,6 +100,20 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
   setLibcallName(RTLIB::MUL_I16, getIntrinsicName(RTLIB::MUL_I16));
   setLibcallName(RTLIB::MUL_I32, getIntrinsicName(RTLIB::MUL_I32));
 
+  // Signed division lib call names
+  setLibcallName(RTLIB::SDIV_I16, getIntrinsicName(RTLIB::SDIV_I16));
+  setLibcallName(RTLIB::SDIV_I32, getIntrinsicName(RTLIB::SDIV_I32));
+  // Unsigned division lib call names
+  setLibcallName(RTLIB::UDIV_I16, getIntrinsicName(RTLIB::UDIV_I16));
+  setLibcallName(RTLIB::UDIV_I32, getIntrinsicName(RTLIB::UDIV_I32));
+
+  // Signed remainder lib call names
+  setLibcallName(RTLIB::SREM_I16, getIntrinsicName(RTLIB::SREM_I16));
+  setLibcallName(RTLIB::SREM_I32, getIntrinsicName(RTLIB::SREM_I32));
+  // Unsigned remainder lib call names
+  setLibcallName(RTLIB::UREM_I16, getIntrinsicName(RTLIB::UREM_I16));
+  setLibcallName(RTLIB::UREM_I32, getIntrinsicName(RTLIB::UREM_I32));
+  
   setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
   setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom);
 
@@ -105,6 +129,7 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
   setOperationAction(ISD::ADDC,    MVT::i8,  Custom);
   setOperationAction(ISD::SUBE,    MVT::i8,  Custom);
   setOperationAction(ISD::SUBC,    MVT::i8,  Custom);
+  setOperationAction(ISD::SUB,    MVT::i8,  Custom);
   setOperationAction(ISD::ADD,    MVT::i8,  Custom);
   setOperationAction(ISD::ADD,    MVT::i16, Custom);
 
@@ -354,21 +379,11 @@ SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) {
   FrameIndexSDNode *FR = dyn_cast<FrameIndexSDNode>(SDValue(N,0));
   // FIXME there isn't really debug info here
   DebugLoc dl = FR->getDebugLoc();
-  // FIXME: Not used.
-  // int Index = FR->getIndex();
 
   // Expand FrameIndex like GlobalAddress and ExternalSymbol
   // Also use Offset field for lo and hi parts. The default 
   // offset is zero.
 
-  /*
-  SDValue Offset = DAG.getConstant(0, MVT::i8);
-  SDValue FI = DAG.getTargetFrameIndex(Index, MVT::i8);
-  SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, FI, Offset);
-  SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, FI, Offset);
-  return DAG.getNode(ISD::BUILD_PAIR, dl, N->getValueType(0), Lo, Hi);
-  */
-
   SDValue ES;
   int FrameOffset;
   SDValue FI = SDValue(N,0);
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index a7744b8f7a4e..87f8fb0b4e0a 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -227,15 +227,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
-    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+    // This is just the low 32 bits of a (signed) fp->i64 conversion.
+    // We cannot do this with Promote because i64 is not a legal type.
+    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 
     // FIXME: disable this lowered code.  This generates 64-bit register values,
     // and we don't model the fact that the top part is clobbered by calls.  We
     // need to flag these together so that the value isn't live across a call.
     //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
-
-    // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
-    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
   } else {
     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
@@ -2858,7 +2857,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
 }
 
 // FIXME: Split this code up when LegalizeDAGTypes lands.
-SDValue PPCTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG,
+SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
                                            DebugLoc dl) {
   assert(Op.getOperand(0).getValueType().isFloatingPoint());
   SDValue Src = Op.getOperand(0);
@@ -2867,9 +2866,11 @@ SDValue PPCTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG,
 
   SDValue Tmp;
   switch (Op.getValueType().getSimpleVT()) {
-  default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
+  default: assert(0 && "Unhandled FP_TO_INT type in custom expander!");
   case MVT::i32:
-    Tmp = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Src);
+    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
+                                                         PPCISD::FCTIDZ, 
+                      dl, MVT::f64, Src);
     break;
   case MVT::i64:
     Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src);
@@ -3740,7 +3741,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
     return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
 
   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
-  case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG,
+  case ISD::FP_TO_UINT:
+  case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
                                                        Op.getDebugLoc());
   case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
@@ -3834,7 +3836,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
     return;
   }
   case ISD::FP_TO_SINT:
-    Results.push_back(LowerFP_TO_SINT(SDValue(N, 0), DAG, dl));
+    Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
     return;
   }
 }
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 79464749724e..b6d046f2dd5c 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -377,7 +377,7 @@ namespace llvm {
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
                                       const PPCSubtarget &Subtarget);
     SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG, DebugLoc dl);
+    SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl);
     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG);
     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG);
     SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG);
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 42883d78728e..b359dd33bd0e 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -59,7 +59,7 @@ private:
   bool HasFastCall;
 
 public:
-  PPCFunctionInfo(MachineFunction &MF) 
+  explicit PPCFunctionInfo(MachineFunction &MF) 
     : FramePointerSaveIndex(0),
       ReturnAddrSaveIndex(0),
       SpillsCR(false),
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 5d5beebda705..cb315062abf6 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -908,6 +908,7 @@ void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const {
   // If we are a leaf function, and use up to 224 bytes of stack space,
   // don't have a frame pointer, calls, or dynamic alloca then we do not need
   // to adjust the stack pointer (we fit in the Red Zone).
+  bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone);
   if (!DisableRedZone &&
       FrameSize <= 224 &&                          // Fits in red zone.
       !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 1b042ddef9b8..dea293b502ca 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -41,7 +41,6 @@ namespace llvm {
   bool RealignStack;
   bool DisableJumpTables;
   bool StrongPHIElim;
-  bool DisableRedZone;
   bool AsmVerbosityDefault(false);
 }
 
@@ -86,11 +85,6 @@ GenerateSoftFloatCalls("soft-float",
   cl::location(UseSoftFloat),
   cl::init(false));
 static cl::opt<bool, true>
-GenerateNoImplicitFloats("no-implicit-float",
-  cl::desc("Don't generate implicit floating point instructions (x86-only)"),
-  cl::location(NoImplicitFloat),
-  cl::init(false));
-static cl::opt<bool, true>
 DontPlaceZerosInBSS("nozero-initialized-in-bss",
   cl::desc("Don't place zero-initialized symbols into bss section"),
   cl::location(NoZerosInBSS),
@@ -163,11 +157,6 @@ EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
   cl::desc("Use strong PHI elimination."),
   cl::location(StrongPHIElim),
   cl::init(false));
-static cl::opt<bool, true>
-DisableRedZoneOption("disable-red-zone",
-  cl::desc("Do not emit code that uses the red zone."),
-  cl::location(DisableRedZone),
-  cl::init(false));
 
 //---------------------------------------------------------------------------
 // TargetMachine Class
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
index 4c3cc828f46e..260474160cd6 100644
--- a/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -14,5 +14,6 @@
 #include "X86ELFWriterInfo.h"
 using namespace llvm;
 
-X86ELFWriterInfo::X86ELFWriterInfo() : TargetELFWriterInfo(EM_386) {}
+X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit) :
+  TargetELFWriterInfo(is64Bit ? EM_X86_64 : EM_386) {}
 X86ELFWriterInfo::~X86ELFWriterInfo() {}
diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h
index 06e051a34ac9..acfa50111d2a 100644
--- a/lib/Target/X86/X86ELFWriterInfo.h
+++ b/lib/Target/X86/X86ELFWriterInfo.h
@@ -20,7 +20,7 @@ namespace llvm {
 
   class X86ELFWriterInfo : public TargetELFWriterInfo {
   public:
-    X86ELFWriterInfo();
+    X86ELFWriterInfo(bool is64Bit);
     virtual ~X86ELFWriterInfo();
   };
 
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 1f507c3e0fd7..ef60ff5e0927 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -126,7 +126,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
   setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
 
-  if (!UseSoftFloat && !NoImplicitFloat) {
+  if (!UseSoftFloat) {
     // SSE has no i16 to fp conversion, only i32
     if (X86ScalarSSEf32) {
       setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
@@ -550,6 +550,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FP_TO_UINT, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
   }
 
   // FIXME: In order to prevent SSE instructions being expanded to MMX ones
@@ -734,6 +738,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
     setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
 
+    setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
+    setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
+    if (!DisableMMX && Subtarget->hasMMX()) {
+      setOperationAction(ISD::FP_TO_SINT,         MVT::v2i32, Custom);
+      setOperationAction(ISD::SINT_TO_FP,         MVT::v2i32, Custom);
+    }
   }
 
   if (Subtarget->hasSSE41()) {
@@ -868,11 +878,14 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
 /// determining it.
 MVT
 X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
-                                       bool isSrcConst, bool isSrcStr) const {
+                                       bool isSrcConst, bool isSrcStr,
+                                       SelectionDAG &DAG) const {
   // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
   // linux.  This is because the stack realignment code can't handle certain
   // cases like PR2962.  This should be removed when PR2962 is fixed.
-  if (!NoImplicitFloat && Subtarget->getStackAlignment() >= 16) {
+  const Function *F = DAG.getMachineFunction().getFunction();
+  bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
+  if (!NoImplicitFloatOps && Subtarget->getStackAlignment() >= 16) {
     if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
       return MVT::v4i32;
     if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
@@ -1404,11 +1417,12 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
       unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs,
                                                        TotalNumXMMRegs);
 
+      bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
       assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
              "SSE register cannot be used when SSE is disabled!");
-      assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloat) &&
+      assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
              "SSE register cannot be used when SSE is disabled!");
-      if (UseSoftFloat || NoImplicitFloat || !Subtarget->hasSSE1())
+      if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
         // Kernel mode asks for SSE to be disabled, so don't push them
         // on the stack.
         TotalNumXMMRegs = 0;
@@ -2414,9 +2428,10 @@ bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) {
 /// specifies a shuffle of elements that is suitable for input to MOVSS,
 /// MOVSD, and MOVD, i.e. setting the lowest element.
 static bool isMOVLMask(const SmallVectorImpl<int> &Mask, MVT VT) {
-  int NumElts = VT.getVectorNumElements();
-  if (NumElts != 2 && NumElts != 4)
+  if (VT.getVectorElementType().getSizeInBits() < 32)
     return false;
+
+  int NumElts = VT.getVectorNumElements();
   
   if (!isUndefOrEqual(Mask[0], NumElts))
     return false;
@@ -3068,7 +3083,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
   }
 
   // Special case for single non-zero, non-undef, element.
-  if (NumNonZero == 1 && NumElems <= 4) {
+  if (NumNonZero == 1) {
     unsigned Idx = CountTrailingZeros_32(NonZeros);
     SDValue Item = Op.getOperand(Idx);
 
@@ -3109,15 +3124,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
     // If we have a constant or non-constant insertion into the low element of
     // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
     // the rest of the elements.  This will be matched as movd/movq/movss/movsd
-    // depending on what the source datatype is.  Because we can only get here
-    // when NumElems <= 4, this only needs to handle i32/f32/i64/f64.
-    if (Idx == 0 &&
-        // Don't do this for i64 values on x86-32.
-        (EVT != MVT::i64 || Subtarget->is64Bit())) {
-      Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
-      // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
-      return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
-                                         Subtarget->hasSSE2(), DAG);
+    // depending on what the source datatype is.
+    if (Idx == 0) {
+      if (NumZero == 0) {
+        return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+      } else if (EVT == MVT::i32 || EVT == MVT::f32 || EVT == MVT::f64 ||
+          (EVT == MVT::i64 && Subtarget->is64Bit())) {
+        Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+        // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
+        return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(),
+                                           DAG);
+      } else if (EVT == MVT::i16 || EVT == MVT::i8) {
+        Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
+        MVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32;
+        Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
+        Item = getShuffleVectorZeroOrUndef(Item, 0, true,
+                                           Subtarget->hasSSE2(), DAG);
+        return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Item);
+      }
     }
 
     // Is it a vector logical left shift?
@@ -4248,7 +4272,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
   SDValue N1 = Op.getOperand(1);
   SDValue N2 = Op.getOperand(2);
 
-  if (EVT.getSizeInBits() == 16) {
+  if (EVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) {
     // Transform it so it match pinsrw which expects a 16-bit value in a GR32
     // as its second argument.
     if (N1.getValueType() != MVT::i32)
@@ -4554,6 +4578,14 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
 
 SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   MVT SrcVT = Op.getOperand(0).getValueType();
+
+  if (SrcVT.isVector()) {
+    if (SrcVT == MVT::v2i32 && Op.getValueType() == MVT::v2f64) {
+      return Op;
+    }
+    return SDValue();
+  }
+
   assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
          "Unknown SINT_TO_FP to lower!");
 
@@ -4845,6 +4877,14 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
 }
 
 SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
+  if (Op.getValueType().isVector()) {
+    if (Op.getValueType() == MVT::v2i32 &&
+        Op.getOperand(0).getValueType() == MVT::v2f64) {
+      return Op;
+    }
+    return SDValue();
+  }
+
   std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true);
   SDValue FIST = Vals.first, StackSlot = Vals.second;
   // If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
@@ -7675,8 +7715,9 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
     if (Elt.getOpcode() == ISD::UNDEF)
       continue;
 
-    if (!TLI.isConsecutiveLoad(Elt.getNode(), Base,
-                               EVT.getSizeInBits()/8, i, MFI))
+    LoadSDNode *LD = cast<LoadSDNode>(Elt);
+    LoadSDNode *LDBase = cast<LoadSDNode>(Base);
+    if (!TLI.isConsecutiveLoad(LD, LDBase, EVT.getSizeInBits()/8, i, MFI))
       return false;
   }
   return true;
@@ -7751,44 +7792,82 @@ static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
 
   MVT VT = N->getValueType(0);
   MVT EVT = VT.getVectorElementType();
-  if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit())
-    // We are looking for load i64 and zero extend. We want to transform
-    // it before legalizer has a chance to expand it. Also look for i64
-    // BUILD_PAIR bit casted to f64.
-    return SDValue();
-  // This must be an insertion into a zero vector.
-  SDValue HighElt = N->getOperand(1);
-  if (!isZeroNode(HighElt))
-    return SDValue();
+  
+  // Before or during type legalization, we want to try and convert a
+  // build_vector of an i64 load and a zero value into vzext_movl before the 
+  // legalizer can break it up.  
+  // FIXME: does the case below remove the need to do this?
+  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) {
+    if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit())
+      return SDValue();
+    
+    // This must be an insertion into a zero vector.
+    SDValue HighElt = N->getOperand(1);
+    if (!isZeroNode(HighElt))
+      return SDValue();
+    
+    // Value must be a load.
+    SDNode *Base = N->getOperand(0).getNode();
+    if (!isa<LoadSDNode>(Base)) {
+      if (Base->getOpcode() != ISD::BIT_CONVERT)
+        return SDValue();
+      Base = Base->getOperand(0).getNode();
+      if (!isa<LoadSDNode>(Base))
+        return SDValue();
+    }
+    
+    // Transform it into VZEXT_LOAD addr.
+    LoadSDNode *LD = cast<LoadSDNode>(Base);
+    
+    // Load must not be an extload.
+    if (LD->getExtensionType() != ISD::NON_EXTLOAD)
+      return SDValue();
+    
+    // Load type should legal type so we don't have to legalize it.
+    if (!TLI.isTypeLegal(VT))
+      return SDValue();
+    
+    SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+    SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
+    SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
+    TargetLowering::TargetLoweringOpt TLO(DAG);
+    TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1));
+    DCI.CommitTargetLoweringOpt(TLO);
+    return ResNode;
+  }
+
+  // The type legalizer will have broken apart v2i64 build_vector created during
+  // widening before the code which handles that case is run.  Look for build
+  // vector (load, load + 4, 0/undef, 0/undef)
+  if (VT == MVT::v4i32 || VT == MVT::v4f32) {
+    LoadSDNode *LD0 = dyn_cast<LoadSDNode>(N->getOperand(0));
+    LoadSDNode *LD1 = dyn_cast<LoadSDNode>(N->getOperand(1));
+    if (!LD0 || !LD1)
+      return SDValue();
+    if (LD0->getExtensionType() != ISD::NON_EXTLOAD ||
+        LD1->getExtensionType() != ISD::NON_EXTLOAD)
+      return SDValue();
+    // Make sure the second elt is a consecutive load.
+    if (!TLI.isConsecutiveLoad(LD1, LD0, EVT.getSizeInBits()/8, 1,
+                               DAG.getMachineFunction().getFrameInfo()))
+      return SDValue();
 
-  // Value must be a load.
-  SDNode *Base = N->getOperand(0).getNode();
-  if (!isa<LoadSDNode>(Base)) {
-    if (Base->getOpcode() != ISD::BIT_CONVERT)
+    SDValue N2 = N->getOperand(2);
+    SDValue N3 = N->getOperand(3);
+    if (!isZeroNode(N2) && N2.getOpcode() != ISD::UNDEF)
       return SDValue();
-    Base = Base->getOperand(0).getNode();
-    if (!isa<LoadSDNode>(Base))
+    if (!isZeroNode(N3) && N3.getOpcode() != ISD::UNDEF)
       return SDValue();
+    
+    SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+    SDValue Ops[] = { LD0->getChain(), LD0->getBasePtr() };
+    SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
+    TargetLowering::TargetLoweringOpt TLO(DAG);
+    TLO.CombineTo(SDValue(LD0, 1), ResNode.getValue(1));
+    DCI.CommitTargetLoweringOpt(TLO);
+    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode);
   }
-
-  // Transform it into VZEXT_LOAD addr.
-  LoadSDNode *LD = cast<LoadSDNode>(Base);
-
-  // Load must not be an extload.
-  if (LD->getExtensionType() != ISD::NON_EXTLOAD)
-    return SDValue();
-
-  // Load type should legal type so we don't have to legalize it.
-  if (!TLI.isTypeLegal(VT))
-    return SDValue();
-
-  SDVTList Tys = DAG.getVTList(VT, MVT::Other);
-  SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
-  SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
-  TargetLowering::TargetLoweringOpt TLO(DAG);
-  TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1));
-  DCI.CommitTargetLoweringOpt(TLO);
-  return ResNode;
+  return SDValue();
 }
 
 /// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
@@ -8242,7 +8321,10 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
   if (VT.getSizeInBits() != 64)
     return SDValue();
 
-  bool F64IsLegal = !UseSoftFloat && !NoImplicitFloat && Subtarget->hasSSE2();
+  const Function *F = DAG.getMachineFunction().getFunction();
+  bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
+  bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps 
+    && Subtarget->hasSSE2();
   if ((VT.isVector() ||
        (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
       isa<LoadSDNode>(St->getValue()) &&
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 550f8bdf9b64..fb4eb6815b21 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -378,7 +378,8 @@ namespace llvm {
     /// determining it.
     virtual
     MVT getOptimalMemOpType(uint64_t Size, unsigned Align,
-                            bool isSrcConst, bool isSrcStr) const;
+                            bool isSrcConst, bool isSrcStr,
+                            SelectionDAG &DAG) const;
     
     /// LowerOperation - Provide custom lowering hooks for some operations.
     ///
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 2cd3733f0fb3..8a9b7c917b4b 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2009,16 +2009,24 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   MachineFunction &MF = *MBB.getParent();
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-  X86FI->setCalleeSavedFrameSize(CSI.size() * SlotSize);
+  unsigned CalleeFrameSize = 0;
   
   unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r;
   for (unsigned i = CSI.size(); i != 0; --i) {
     unsigned Reg = CSI[i-1].getReg();
+    const TargetRegisterClass *RegClass = CSI[i-1].getRegClass();
     // Add the callee-saved register as live-in. It's killed at the spill.
     MBB.addLiveIn(Reg);
-    BuildMI(MBB, MI, DL, get(Opc))
-      .addReg(Reg, RegState::Kill);
+    if (RegClass != &X86::VR128RegClass) {
+      CalleeFrameSize += SlotSize;
+      BuildMI(MBB, MI, DL, get(Opc))
+        .addReg(Reg, RegState::Kill);
+    } else {
+      storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass);
+    }
   }
+
+  X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
   return true;
 }
 
@@ -2036,7 +2044,12 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r;
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     unsigned Reg = CSI[i].getReg();
-    BuildMI(MBB, MI, DL, get(Opc), Reg);
+    const TargetRegisterClass *RegClass = CSI[i].getRegClass();
+    if (RegClass != &X86::VR128RegClass) {
+      BuildMI(MBB, MI, DL, get(Opc), Reg);
+    } else {
+      loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass);
+    }
   }
   return true;
 }
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 8f287e17dc3f..43fadc219bb7 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -577,41 +577,17 @@ def : Pat<(f64 (bitconvert (v4i16 VR64:$src))),
 def : Pat<(f64 (bitconvert (v8i8 VR64:$src))),
           (MMX_MOVQ2FR64rr VR64:$src)>;
 
-// Move scalar to MMX zero-extended
-// movd to MMX register zero-extends
-let AddedComplexity = 15 in {
-  def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))))),
-           (MMX_MOVZDI2PDIrr GR32:$src)>; 
-  def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))))),
-           (MMX_MOVZDI2PDIrr GR32:$src)>; 
-}
-
 let AddedComplexity = 20 in {
-  def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (load_mmx addr:$src)))),
-            (MMX_MOVZDI2PDIrm addr:$src)>; 
-  def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (load_mmx addr:$src)))),
-            (MMX_MOVZDI2PDIrm addr:$src)>; 
   def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))),
             (MMX_MOVZDI2PDIrm addr:$src)>; 
 }
 
 // Clear top half.
 let AddedComplexity = 15 in {
-  def : Pat<(v8i8 (X86vzmovl VR64:$src)),
-            (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
-  def : Pat<(v4i16 (X86vzmovl VR64:$src)),
-            (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
   def : Pat<(v2i32 (X86vzmovl VR64:$src)),
             (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
 }
 
-// Scalar to v4i16 / v8i8. The source may be a GR32, but only the lower
-// 8 or 16-bits matter.
-def : Pat<(bc_v8i8  (v2i32 (scalar_to_vector GR32:$src))),
-          (MMX_MOVD64rr GR32:$src)>;
-def : Pat<(bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))),
-          (MMX_MOVD64rr GR32:$src)>;
-
 // Patterns to perform canonical versions of vector shuffling.
 let AddedComplexity = 10 in {
   def : Pat<(v8i8  (mmx_unpckl_undef VR64:$src, (undef))),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 1fafa46fa2d8..b44c7a693ef7 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3447,7 +3447,7 @@ multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
 }
 
 defm PMOVSXBQ   : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
-defm PMOVZXBQ   : SS41I_binop_rm_int2<0x32, "pmovsxbq", int_x86_sse41_pmovzxbq>;
+defm PMOVZXBQ   : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
 
 // Common patterns involving scalar load
 def : Pat<(int_x86_sse41_pmovsxbq
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index 8a5ac2c9a85c..fafcf7e3d79a 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -73,14 +73,15 @@ public:
                              SRetReturnReg(0),
                              GlobalBaseReg(0) {}
   
-  X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false),
-                                                CalleeSavedFrameSize(0),
-                                                BytesToPopOnReturn(0),
-                                                DecorationStyle(None),
-                                                ReturnAddrIndex(0),
-                                                TailCallReturnAddrDelta(0),
-                                                SRetReturnReg(0),
-                                                GlobalBaseReg(0) {}
+  explicit X86MachineFunctionInfo(MachineFunction &MF)
+    : ForceFramePointer(false),
+      CalleeSavedFrameSize(0),
+      BytesToPopOnReturn(0),
+      DecorationStyle(None),
+      ReturnAddrIndex(0),
+      TailCallReturnAddrDelta(0),
+      SRetReturnReg(0),
+      GlobalBaseReg(0) {}
   
   bool getForceFramePointer() const { return ForceFramePointer;} 
   void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 5af1fb17818d..c733f26a8c8f 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -751,10 +751,12 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
   // function, and use up to 128 bytes of stack space, don't have a frame
   // pointer, calls, or dynamic alloca then we do not need to adjust the
   // stack pointer (we fit in the Red Zone).
+  bool DisableRedZone = Fn->hasFnAttr(Attribute::NoRedZone);
   if (Is64Bit && !DisableRedZone &&
       !needsStackRealignment(MF) &&
       !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
-      !MFI->hasCalls()) {                          // No calls.
+      !MFI->hasCalls() &&                          // No calls.
+      !Subtarget->isTargetWin64()) {               // Win64 has no Red Zone
     uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
     if (hasFP(MF)) MinSize += SlotSize;
     StackSize = std::max(MinSize,
@@ -820,13 +822,6 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
     NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
   }
 
-  unsigned ReadyLabelId = 0;
-  if (needsFrameMoves) {
-    // Mark effective beginning of when frame pointer is ready.
-    ReadyLabelId = MMI->NextLabelID();
-    BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
-  }
-
   // Skip the callee-saved push instructions.
   while (MBBI != MBB.end() &&
          (MBBI->getOpcode() == X86::PUSH32r ||
@@ -836,20 +831,20 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
   if (MBBI != MBB.end())
     DL = MBBI->getDebugLoc();
 
-  if (NumBytes) {   // adjust stack pointer: ESP -= numbytes
+  if (NumBytes) {   // Adjust stack pointer: ESP -= numbytes.
     if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
-      // Check, whether EAX is livein for this function
+      // Check, whether EAX is livein for this function.
       bool isEAXAlive = false;
       for (MachineRegisterInfo::livein_iterator
            II = MF.getRegInfo().livein_begin(),
            EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) {
         unsigned Reg = II->first;
         isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
-                      Reg == X86::AH || Reg == X86::AL);
+                      Reg == X86::AH  || Reg == X86::AL);
       }
 
-      // Function prologue calls _alloca to probe the stack when allocating
-      // more than 4k bytes in one go. Touching the stack at 4K increments is
+      // Function prologue calls _alloca to probe the stack when allocating more
+      // than 4k bytes in one go. Touching the stack at 4K increments is
       // necessary to ensure that the guard pages used by the OS virtual memory
       // manager are allocated in correct sequence.
       if (!isEAXAlive) {
@@ -861,12 +856,14 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
         // Save EAX
         BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
           .addReg(X86::EAX, RegState::Kill);
+
         // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
         // allocated bytes for EAX.
-        BuildMI(MBB, MBBI, DL, 
-                TII.get(X86::MOV32ri), X86::EAX).addImm(NumBytes-4);
+        BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+          .addImm(NumBytes-4);
         BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
           .addExternalSymbol("_alloca");
+
         // Restore EAX
         MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
                                                 X86::EAX),
@@ -878,6 +875,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
       // merge the two. This can be the case when tail call elimination is
       // enabled and the callee has more arguments then the caller.
       NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
+
       // If there is an ADD32ri or SUB32ri of ESP immediately after this
       // instruction, merge the two instructions.
       mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
@@ -887,8 +885,13 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
     }
   }
 
-  if (needsFrameMoves)
+  if (needsFrameMoves) {
+    // Mark effective beginning of when frame pointer is ready.
+    unsigned ReadyLabelId = 0;
+    ReadyLabelId = MMI->NextLabelID();
+    BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
     emitFrameMoves(MF, FrameLabelId, ReadyLabelId);
+  }
 }
 
 void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 8264462506aa..88ab247b0d55 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -133,7 +133,8 @@ X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS,
     DataLayout(Subtarget.getDataLayout()),
     FrameInfo(TargetFrameInfo::StackGrowsDown,
               Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4),
-    InstrInfo(*this), JITInfo(*this), TLInfo(*this) {
+    InstrInfo(*this), JITInfo(*this), TLInfo(*this),
+    ELFWriterInfo(Subtarget.is64Bit()) {
   DefRelocModel = getRelocationModel();
   // FIXME: Correctly select PIC model for Win64 stuff
   if (getRelocationModel() == Reloc::Default) {
@@ -213,6 +214,13 @@ bool X86TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
                                           CodeGenOpt::Level OptLevel,
                                           bool Verbose,
                                           raw_ostream &Out) {
+  // FIXME: Move this somewhere else!
+  // On Darwin, override 64-bit static relocation to pic_ since the
+  // assembler doesn't support it.
+  if (DefRelocModel == Reloc::Static &&
+      Subtarget.isTargetDarwin() && Subtarget.is64Bit())
+    setRelocationModel(Reloc::PIC_);
+
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
   if (AsmPrinterCtor)
     PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h
index 43adb0f917c9..124a011f3cb8 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -40,7 +40,7 @@ public:
     FPSpillSlot(0),
     VarArgsFrameIndex(0) {}
   
-  XCoreFunctionInfo(MachineFunction &MF) :
+  explicit XCoreFunctionInfo(MachineFunction &MF) :
     UsesLR(false),
     LRSpillSlot(0),
     FPSpillSlot(0),
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 733dfa97a154..673d38b7f3ae 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -59,7 +59,8 @@ cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
 /// two values.
 namespace {
   struct VISIBILITY_HIDDEN Expression {
-    enum ExpressionOpcode { ADD, SUB, MUL, UDIV, SDIV, FDIV, UREM, SREM, 
+    enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL,
+                            UDIV, SDIV, FDIV, UREM, SREM,
                             FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, 
                             ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, 
                             ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, 
@@ -200,8 +201,11 @@ Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) {
   default: // THIS SHOULD NEVER HAPPEN
     assert(0 && "Binary operator with unknown opcode?");
   case Instruction::Add:  return Expression::ADD;
+  case Instruction::FAdd: return Expression::FADD;
   case Instruction::Sub:  return Expression::SUB;
+  case Instruction::FSub: return Expression::FSUB;
   case Instruction::Mul:  return Expression::MUL;
+  case Instruction::FMul: return Expression::FMUL;
   case Instruction::UDiv: return Expression::UDIV;
   case Instruction::SDiv: return Expression::SDIV;
   case Instruction::FDiv: return Expression::FDIV;
diff --git a/lib/Transforms/Scalar/GVNPRE.cpp b/lib/Transforms/Scalar/GVNPRE.cpp
index e3b09379a22d..0f3153f2a7b1 100644
--- a/lib/Transforms/Scalar/GVNPRE.cpp
+++ b/lib/Transforms/Scalar/GVNPRE.cpp
@@ -55,7 +55,8 @@ namespace {
 /// two values.
 
 struct Expression {
-  enum ExpressionOpcode { ADD, SUB, MUL, UDIV, SDIV, FDIV, UREM, SREM, 
+  enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL,
+                          UDIV, SDIV, FDIV, UREM, SREM,
                           FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, 
                           ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, 
                           ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, 
@@ -202,10 +203,16 @@ Expression::ExpressionOpcode
   switch(BO->getOpcode()) {
     case Instruction::Add:
       return Expression::ADD;
+    case Instruction::FAdd:
+      return Expression::FADD;
     case Instruction::Sub:
       return Expression::SUB;
+    case Instruction::FSub:
+      return Expression::FSUB;
     case Instruction::Mul:
       return Expression::MUL;
+    case Instruction::FMul:
+      return Expression::FMUL;
     case Instruction::UDiv:
       return Expression::UDIV;
     case Instruction::SDiv:
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index af61eae16f80..83503fdea24b 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -754,7 +754,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
   BinaryOperator *Incr =
     dyn_cast<BinaryOperator>(PH->getIncomingValue(BackEdge));
   if (!Incr) return;
-  if (Incr->getOpcode() != Instruction::Add) return;
+  if (Incr->getOpcode() != Instruction::FAdd) return;
   ConstantFP *IncrValue = NULL;
   unsigned IncrVIndex = 1;
   if (Incr->getOperand(1) == PH)
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index e6f854f1a56b..97bd34c771c2 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -167,8 +167,11 @@ namespace {
     //   otherwise    - Change was made, replace I with returned instruction
     //
     Instruction *visitAdd(BinaryOperator &I);
+    Instruction *visitFAdd(BinaryOperator &I);
     Instruction *visitSub(BinaryOperator &I);
+    Instruction *visitFSub(BinaryOperator &I);
     Instruction *visitMul(BinaryOperator &I);
+    Instruction *visitFMul(BinaryOperator &I);
     Instruction *visitURem(BinaryOperator &I);
     Instruction *visitSRem(BinaryOperator &I);
     Instruction *visitFRem(BinaryOperator &I);
@@ -403,7 +406,8 @@ X("instcombine", "Combine redundant instructions");
 //   0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
 static unsigned getComplexity(Value *V) {
   if (isa<Instruction>(V)) {
-    if (BinaryOperator::isNeg(V) || BinaryOperator::isNot(V))
+    if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) ||
+        BinaryOperator::isNot(V))
       return 3;
     return 4;
   }
@@ -576,6 +580,25 @@ static inline Value *dyn_castNegVal(Value *V) {
   return 0;
 }
 
+// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the
+// instruction if the LHS is a constant negative zero (which is the 'negate'
+// form).
+//
+static inline Value *dyn_castFNegVal(Value *V) {
+  if (BinaryOperator::isFNeg(V))
+    return BinaryOperator::getFNegArgument(V);
+
+  // Constants can be considered to be negated values if they can be folded.
+  if (ConstantFP *C = dyn_cast<ConstantFP>(V))
+    return ConstantExpr::getFNeg(C);
+
+  if (ConstantVector *C = dyn_cast<ConstantVector>(V))
+    if (C->getType()->getElementType()->isFloatingPoint())
+      return ConstantExpr::getFNeg(C);
+
+  return 0;
+}
+
 static inline Value *dyn_castNotVal(Value *V) {
   if (BinaryOperator::isNot(V))
     return BinaryOperator::getNotArgument(V);
@@ -1733,12 +1756,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
           default: assert(0 && "Case stmts out of sync!");
           case Intrinsic::x86_sse_sub_ss:
           case Intrinsic::x86_sse2_sub_sd:
-            TmpV = InsertNewInstBefore(BinaryOperator::CreateSub(LHS, RHS,
+            TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS,
                                                         II->getName()), *II);
             break;
           case Intrinsic::x86_sse_mul_ss:
           case Intrinsic::x86_sse2_mul_sd:
-            TmpV = InsertNewInstBefore(BinaryOperator::CreateMul(LHS, RHS,
+            TmpV = InsertNewInstBefore(BinaryOperator::CreateFMul(LHS, RHS,
                                                          II->getName()), *II);
             break;
           }
@@ -2052,14 +2075,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       return ReplaceInstUsesWith(I, RHS);
 
     // X + 0 --> X
-    if (!I.getType()->isFPOrFPVector()) { // NOTE: -0 + +0 = +0.
-      if (RHSC->isNullValue())
-        return ReplaceInstUsesWith(I, LHS);
-    } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
-      if (CFP->isExactlyValue(ConstantFP::getNegativeZero
-                              (I.getType())->getValueAPF()))
-        return ReplaceInstUsesWith(I, LHS);
-    }
+    if (RHSC->isNullValue())
+      return ReplaceInstUsesWith(I, LHS);
 
     if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) {
       // X + (signbit) --> X ^ signbit
@@ -2317,11 +2334,6 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
         return SelectInst::Create(SI->getCondition(), A, N);
     }
   }
-  
-  // Check for X+0.0.  Simplify it to X if we know X is not -0.0.
-  if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
-    if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
-      return ReplaceInstUsesWith(I, LHS);
 
   // Check for (add (sext x), y), see if we can merge this into an
   // integer add followed by a sext.
@@ -2359,7 +2371,42 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       }
     }
   }
-  
+
+  return Changed ? &I : 0;
+}
+
+Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
+  bool Changed = SimplifyCommutative(I);
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+    // X + 0 --> X
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
+      if (CFP->isExactlyValue(ConstantFP::getNegativeZero
+                              (I.getType())->getValueAPF()))
+        return ReplaceInstUsesWith(I, LHS);
+    }
+
+    if (isa<PHINode>(LHS))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+  // -A + B  -->  B - A
+  // -A + -B  -->  -(A + B)
+  if (Value *LHSV = dyn_castFNegVal(LHS))
+    return BinaryOperator::CreateFSub(RHS, LHSV);
+
+  // A + -B  -->  A - B
+  if (!isa<Constant>(RHS))
+    if (Value *V = dyn_castFNegVal(RHS))
+      return BinaryOperator::CreateFSub(LHS, V);
+
+  // Check for X+0.0.  Simplify it to X if we know X is not -0.0.
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
+    if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
+      return ReplaceInstUsesWith(I, LHS);
+
   // Check for (add double (sitofp x), y), see if we can merge this into an
   // integer add followed by a promotion.
   if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
@@ -2407,8 +2454,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 Instruction *InstCombiner::visitSub(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  if (Op0 == Op1 &&                        // sub X, X  -> 0
-      !I.getType()->isFPOrFPVector())
+  if (Op0 == Op1)                        // sub X, X  -> 0
     return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
   // If this is a 'B = x-(-A)', change to B = x+A...
@@ -2469,8 +2515,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     return BinaryOperator::CreateXor(Op0, Op1);
 
   if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
-    if (Op1I->getOpcode() == Instruction::Add &&
-        !Op0->getType()->isFPOrFPVector()) {
+    if (Op1I->getOpcode() == Instruction::Add) {
       if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y
         return BinaryOperator::CreateNeg(Op1I->getOperand(1), I.getName());
       else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
@@ -2487,8 +2532,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
       // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression
       // is not used by anyone else...
       //
-      if (Op1I->getOpcode() == Instruction::Sub &&
-          !Op1I->getType()->isFPOrFPVector()) {
+      if (Op1I->getOpcode() == Instruction::Sub) {
         // Swap the two operands of the subexpr...
         Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1);
         Op1I->setOperand(0, IIOp1);
@@ -2526,18 +2570,17 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     }
   }
 
-  if (!Op0->getType()->isFPOrFPVector())
-    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
-      if (Op0I->getOpcode() == Instruction::Add) {
-        if (Op0I->getOperand(0) == Op1)             // (Y+X)-Y == X
-          return ReplaceInstUsesWith(I, Op0I->getOperand(1));
-        else if (Op0I->getOperand(1) == Op1)        // (X+Y)-Y == X
-          return ReplaceInstUsesWith(I, Op0I->getOperand(0));
-      } else if (Op0I->getOpcode() == Instruction::Sub) {
-        if (Op0I->getOperand(0) == Op1)             // (X-Y)-X == -Y
-          return BinaryOperator::CreateNeg(Op0I->getOperand(1), I.getName());
-      }
+  if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+    if (Op0I->getOpcode() == Instruction::Add) {
+      if (Op0I->getOperand(0) == Op1)             // (Y+X)-Y == X
+        return ReplaceInstUsesWith(I, Op0I->getOperand(1));
+      else if (Op0I->getOperand(1) == Op1)        // (X+Y)-Y == X
+        return ReplaceInstUsesWith(I, Op0I->getOperand(0));
+    } else if (Op0I->getOpcode() == Instruction::Sub) {
+      if (Op0I->getOperand(0) == Op1)             // (X-Y)-X == -Y
+        return BinaryOperator::CreateNeg(Op0I->getOperand(1), I.getName());
     }
+  }
 
   ConstantInt *C1;
   if (Value *X = dyn_castFoldableMul(Op0, C1)) {
@@ -2551,6 +2594,40 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
   return 0;
 }
 
+Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // If this is a 'B = x-(-A)', change to B = x+A...
+  if (Value *V = dyn_castFNegVal(Op1))
+    return BinaryOperator::CreateFAdd(Op0, V);
+
+  if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
+    if (Op1I->getOpcode() == Instruction::FAdd) {
+      if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y
+        return BinaryOperator::CreateFNeg(Op1I->getOperand(1), I.getName());
+      else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
+        return BinaryOperator::CreateFNeg(Op1I->getOperand(0), I.getName());
+    }
+
+    if (Op1I->hasOneUse()) {
+      // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression
+      // is not used by anyone else...
+      //
+      if (Op1I->getOpcode() == Instruction::FSub) {
+        // Swap the two operands of the subexpr...
+        Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1);
+        Op1I->setOperand(0, IIOp1);
+        Op1I->setOperand(1, IIOp0);
+
+        // Create the new top level fadd instruction...
+        return BinaryOperator::CreateFAdd(Op0, Op1);
+      }
+    }
+  }
+
+  return 0;
+}
+
 /// isSignBitCheck - Given an exploded icmp instruction, return true if the
 /// comparison only checks the sign bit.  If it only checks the sign bit, set
 /// TrueIfSigned if the result of the comparison is true when the input value is
@@ -2585,7 +2662,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   bool Changed = SimplifyCommutative(I);
   Value *Op0 = I.getOperand(0);
 
-  if (isa<UndefValue>(I.getOperand(1)))              // undef * X -> 0
+  // TODO: If Op1 is undef and Op0 is finite, return zero.
+  if (!I.getType()->isFPOrFPVector() &&
+      isa<UndefValue>(I.getOperand(1)))              // undef * X -> 0
     return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
   // Simplify mul instructions with a constant RHS...
@@ -2611,17 +2690,8 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
         return BinaryOperator::CreateShl(Op0,
                  ConstantInt::get(Op0->getType(), Val.logBase2()));
       }
-    } else if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1)) {
-      if (Op1F->isNullValue())
-        return ReplaceInstUsesWith(I, Op1);
-
-      // "In IEEE floating point, x*1 is not equivalent to x for nans.  However,
-      // ANSI says we can drop signals, so we can do this anyway." (from GCC)
-      if (Op1F->isExactlyValue(1.0))
-        return ReplaceInstUsesWith(I, Op0);  // Eliminate 'mul double %X, 1.0'
     } else if (isa<VectorType>(Op1->getType())) {
-      if (isa<ConstantAggregateZero>(Op1))
-        return ReplaceInstUsesWith(I, Op1);
+      // TODO: If Op1 is all zeros and Op0 is all finite, return all zeros.
 
       if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
         if (Op1V->isAllOnesValue())              // X * -1 == 0 - X
@@ -2629,9 +2699,6 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
 
         // As above, vector X*splat(1.0) -> X in all defined cases.
         if (Constant *Splat = Op1V->getSplatValue()) {
-          if (ConstantFP *F = dyn_cast<ConstantFP>(Splat))
-            if (F->isExactlyValue(1.0))
-              return ReplaceInstUsesWith(I, Op0);
           if (ConstantInt *CI = dyn_cast<ConstantInt>(Splat))
             if (CI->equalsInt(1))
               return ReplaceInstUsesWith(I, Op0);
@@ -2755,6 +2822,45 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   return Changed ? &I : 0;
 }
 
+Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
+  bool Changed = SimplifyCommutative(I);
+  Value *Op0 = I.getOperand(0);
+
+  // Simplify mul instructions with a constant RHS...
+  if (Constant *Op1 = dyn_cast<Constant>(I.getOperand(1))) {
+    if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1)) {
+      // "In IEEE floating point, x*1 is not equivalent to x for nans.  However,
+      // ANSI says we can drop signals, so we can do this anyway." (from GCC)
+      if (Op1F->isExactlyValue(1.0))
+        return ReplaceInstUsesWith(I, Op0);  // Eliminate 'mul double %X, 1.0'
+    } else if (isa<VectorType>(Op1->getType())) {
+      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
+        // As above, vector X*splat(1.0) -> X in all defined cases.
+        if (Constant *Splat = Op1V->getSplatValue()) {
+          if (ConstantFP *F = dyn_cast<ConstantFP>(Splat))
+            if (F->isExactlyValue(1.0))
+              return ReplaceInstUsesWith(I, Op0);
+        }
+      }
+    }
+
+    // Try to fold constant mul into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+      if (Instruction *R = FoldOpIntoSelect(I, SI, this))
+        return R;
+
+    if (isa<PHINode>(Op0))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+  if (Value *Op0v = dyn_castFNegVal(Op0))     // -X * -Y = X*Y
+    if (Value *Op1v = dyn_castFNegVal(I.getOperand(1)))
+      return BinaryOperator::CreateFMul(Op0v, Op1v);
+
+  return Changed ? &I : 0;
+}
+
 /// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select
 /// instruction.
 bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
@@ -8562,17 +8668,17 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
   if (Instruction *I = commonCastTransforms(CI))
     return I;
   
-  // If we have fptrunc(add (fpextend x), (fpextend y)), where x and y are
+  // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are
   // smaller than the destination type, we can eliminate the truncate by doing
-  // the add as the smaller type.  This applies to add/sub/mul/div as well as
+  // the add as the smaller type.  This applies to fadd/fsub/fmul/fdiv as well as
   // many builtins (sqrt, etc).
   BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0));
   if (OpI && OpI->hasOneUse()) {
     switch (OpI->getOpcode()) {
     default: break;
-    case Instruction::Add:
-    case Instruction::Sub:
-    case Instruction::Mul:
+    case Instruction::FAdd:
+    case Instruction::FSub:
+    case Instruction::FMul:
     case Instruction::FDiv:
     case Instruction::FRem:
       const Type *SrcTy = OpI->getType();
@@ -9322,11 +9428,15 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
 
         // Turn select C, (X+Y), (X-Y) --> (X+(select C, Y, (-Y))).  This is
         // even legal for FP.
-        if (TI->getOpcode() == Instruction::Sub &&
-            FI->getOpcode() == Instruction::Add) {
+        if ((TI->getOpcode() == Instruction::Sub &&
+             FI->getOpcode() == Instruction::Add) ||
+            (TI->getOpcode() == Instruction::FSub &&
+             FI->getOpcode() == Instruction::FAdd)) {
           AddOp = FI; SubOp = TI;
-        } else if (FI->getOpcode() == Instruction::Sub &&
-                   TI->getOpcode() == Instruction::Add) {
+        } else if ((FI->getOpcode() == Instruction::Sub &&
+                    TI->getOpcode() == Instruction::Add) ||
+                   (FI->getOpcode() == Instruction::FSub &&
+                    TI->getOpcode() == Instruction::FAdd)) {
           AddOp = TI; SubOp = FI;
         }
 
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 92270b5b6473..944f40931910 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -2268,7 +2268,8 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
       /* create new increment. '++d' in above example. */
       ConstantFP *CFP = ConstantFP::get(DestTy, C->getZExtValue());
       BinaryOperator *NewIncr = 
-        BinaryOperator::Create(Incr->getOpcode(),
+        BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
+                                 Instruction::FAdd : Instruction::FSub,
                                NewPH, CFP, "IV.S.next.", Incr);
 
       NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
@@ -2424,24 +2425,14 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
 
   // Get the terminating condition for the loop if possible (this isn't
   // necessarily in the latch, or a block that's a predecessor of the header).
-  SmallVector<BasicBlock*, 8> ExitBlocks;
-  L->getExitBlocks(ExitBlocks);
-  if (ExitBlocks.size() != 1) return;
+  if (!L->getExitBlock())
+    return; // More than one loop exit blocks.
 
   // Okay, there is one exit block.  Try to find the condition that causes the
   // loop to be exited.
-  BasicBlock *ExitBlock = ExitBlocks[0];
-
-  BasicBlock *ExitingBlock = 0;
-  for (pred_iterator PI = pred_begin(ExitBlock), E = pred_end(ExitBlock);
-       PI != E; ++PI)
-    if (L->contains(*PI)) {
-      if (ExitingBlock == 0)
-        ExitingBlock = *PI;
-      else
-        return; // More than one block exiting!
-    }
-  assert(ExitingBlock && "No exits from loop, something is broken!");
+  BasicBlock *ExitingBlock = L->getExitingBlock();
+  if (!ExitingBlock)
+    return; // More than one block exiting!
 
   // Okay, we've computed the exiting block.  See what condition causes us to
   // exit.
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 4b0064090cf3..59989c92d740 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -1009,7 +1009,7 @@ struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization {
     if (Op2C->isExactlyValue(1.0))  // pow(x, 1.0) -> x
       return Op1;
     if (Op2C->isExactlyValue(2.0))  // pow(x, 2.0) -> x*x
-      return B.CreateMul(Op1, Op1, "pow2");
+      return B.CreateFMul(Op1, Op1, "pow2");
     if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
       return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip");
     return 0;
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 2cde765560b8..bcc6b819013b 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -419,9 +419,6 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
       case Instruction::LShr:
       case Instruction::AShr:
       case Instruction::ICmp:
-      case Instruction::FCmp:
-        if (I->getOperand(0)->getType()->isFPOrFPVector())
-          return false;  // FP arithmetic might trap.
         break;   // These are all cheap and non-trapping instructions.
       }
 
@@ -1012,9 +1009,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
   default: return false;  // Not safe / profitable to hoist.
   case Instruction::Add:
   case Instruction::Sub:
-    // FP arithmetic might trap. Not worth doing for vector ops.
-    if (HInst->getType()->isFloatingPoint() 
-        || isa<VectorType>(HInst->getType()))
+    // Not worth doing for vector ops.
+    if (isa<VectorType>(HInst->getType()))
       return false;
     break;
   case Instruction::And:
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index 5a8fad9cd326..8dfbd1d50216 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -59,6 +59,10 @@ std::string Attribute::getAsString(Attributes Attrs) {
     Result += "ssp ";
   if (Attrs & Attribute::StackProtectReq)
     Result += "sspreq ";
+  if (Attrs & Attribute::NoRedZone)
+    Result += "noredzone ";
+  if (Attrs & Attribute::NoImplicitFloat)
+    Result += "noimplicitfloat ";
   if (Attrs & Attribute::Alignment) {
     Result += "align ";
     Result += utostr(Attribute::getAlignmentFromAttrs(Attrs));
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 7e4902fd5635..1d293ccbd44d 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -602,10 +602,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
       return Constant::getNullValue(C1->getType());
     case Instruction::UDiv:
     case Instruction::SDiv:
-    case Instruction::FDiv:
     case Instruction::URem:
     case Instruction::SRem:
-    case Instruction::FRem:
       if (!isa<UndefValue>(C2))                    // undef / X -> 0
         return Constant::getNullValue(C1->getType());
       return const_cast<Constant*>(C2);            // X / undef -> undef
@@ -783,13 +781,13 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
       switch (Opcode) {
       default:                   
         break;
-      case Instruction::Add:
+      case Instruction::FAdd:
         (void)C3V.add(C2V, APFloat::rmNearestTiesToEven);
         return ConstantFP::get(C3V);
-      case Instruction::Sub:     
+      case Instruction::FSub:
         (void)C3V.subtract(C2V, APFloat::rmNearestTiesToEven);
         return ConstantFP::get(C3V);
-      case Instruction::Mul:
+      case Instruction::FMul:
         (void)C3V.multiply(C2V, APFloat::rmNearestTiesToEven);
         return ConstantFP::get(C3V);
       case Instruction::FDiv:
@@ -808,12 +806,18 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
       switch (Opcode) {
       default:
         break;
-      case Instruction::Add: 
+      case Instruction::Add:
         return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getAdd);
-      case Instruction::Sub: 
+      case Instruction::FAdd:
+        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getFAdd);
+      case Instruction::Sub:
         return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getSub);
-      case Instruction::Mul: 
+      case Instruction::FSub:
+        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getFSub);
+      case Instruction::Mul:
         return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getMul);
+      case Instruction::FMul:
+        return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getFMul);
       case Instruction::UDiv:
         return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getUDiv);
       case Instruction::SDiv:
@@ -851,7 +855,9 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
     // other way if possible.
     switch (Opcode) {
     case Instruction::Add:
+    case Instruction::FAdd:
     case Instruction::Mul:
+    case Instruction::FMul:
     case Instruction::And:
     case Instruction::Or:
     case Instruction::Xor:
@@ -862,6 +868,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
     case Instruction::LShr:
     case Instruction::AShr:
     case Instruction::Sub:
+    case Instruction::FSub:
     case Instruction::SDiv:
     case Instruction::UDiv:
     case Instruction::FDiv:
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 97f3ac9c270a..69c503dff956 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -775,26 +775,46 @@ const SmallVector<unsigned, 4> &ConstantExpr::getIndices() const {
 /// specify the full Instruction::OPCODE identifier.
 ///
 Constant *ConstantExpr::getNeg(Constant *C) {
+  // API compatibility: Adjust integer opcodes to floating-point opcodes.
+  if (C->getType()->isFPOrFPVector())
+    return getFNeg(C);
+  assert(C->getType()->isIntOrIntVector() &&
+         "Cannot NEG a nonintegral value!");
   return get(Instruction::Sub,
              ConstantExpr::getZeroValueForNegationExpr(C->getType()),
              C);
 }
+Constant *ConstantExpr::getFNeg(Constant *C) {
+  assert(C->getType()->isFPOrFPVector() &&
+         "Cannot FNEG a non-floating-point value!");
+  return get(Instruction::FSub,
+             ConstantExpr::getZeroValueForNegationExpr(C->getType()),
+             C);
+}
 Constant *ConstantExpr::getNot(Constant *C) {
-  assert((isa<IntegerType>(C->getType()) ||
-            cast<VectorType>(C->getType())->getElementType()->isInteger()) &&
-          "Cannot NOT a nonintegral value!");
+  assert(C->getType()->isIntOrIntVector() &&
+         "Cannot NOT a nonintegral value!");
   return get(Instruction::Xor, C,
              Constant::getAllOnesValue(C->getType()));
 }
 Constant *ConstantExpr::getAdd(Constant *C1, Constant *C2) {
   return get(Instruction::Add, C1, C2);
 }
+Constant *ConstantExpr::getFAdd(Constant *C1, Constant *C2) {
+  return get(Instruction::FAdd, C1, C2);
+}
 Constant *ConstantExpr::getSub(Constant *C1, Constant *C2) {
   return get(Instruction::Sub, C1, C2);
 }
+Constant *ConstantExpr::getFSub(Constant *C1, Constant *C2) {
+  return get(Instruction::FSub, C1, C2);
+}
 Constant *ConstantExpr::getMul(Constant *C1, Constant *C2) {
   return get(Instruction::Mul, C1, C2);
 }
+Constant *ConstantExpr::getFMul(Constant *C1, Constant *C2) {
+  return get(Instruction::FMul, C1, C2);
+}
 Constant *ConstantExpr::getUDiv(Constant *C1, Constant *C2) {
   return get(Instruction::UDiv, C1, C2);
 }
@@ -2142,15 +2162,28 @@ Constant *ConstantExpr::getCompareTy(unsigned short predicate,
 }
 
 Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2) {
+  // API compatibility: Adjust integer opcodes to floating-point opcodes.
+  if (C1->getType()->isFPOrFPVector()) {
+    if (Opcode == Instruction::Add) Opcode = Instruction::FAdd;
+    else if (Opcode == Instruction::Sub) Opcode = Instruction::FSub;
+    else if (Opcode == Instruction::Mul) Opcode = Instruction::FMul;
+  }
 #ifndef NDEBUG
   switch (Opcode) {
-  case Instruction::Add: 
+  case Instruction::Add:
   case Instruction::Sub:
-  case Instruction::Mul: 
+  case Instruction::Mul:
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert((C1->getType()->isInteger() || C1->getType()->isFloatingPoint() ||
-            isa<VectorType>(C1->getType())) &&
-           "Tried to create an arithmetic operation on a non-arithmetic type!");
+    assert(C1->getType()->isIntOrIntVector() &&
+           "Tried to create an integer operation on a non-integer type!");
+    break;
+  case Instruction::FAdd:
+  case Instruction::FSub:
+  case Instruction::FMul:
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isFPOrFPVector() &&
+           "Tried to create a floating-point operation on a "
+           "non-floating-point type!");
     break;
   case Instruction::UDiv: 
   case Instruction::SDiv: 
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
index 9e030b78e9e5..7556b8ebe760 100644
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@@ -101,8 +101,11 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
 
   // Standard binary operators...
   case Add: return "add";
+  case FAdd: return "fadd";
   case Sub: return "sub";
+  case FSub: return "fsub";
   case Mul: return "mul";
+  case FMul: return "fmul";
   case UDiv: return "udiv";
   case SDiv: return "sdiv";
   case FDiv: return "fdiv";
@@ -330,19 +333,13 @@ bool Instruction::mayThrow() const {
 
 /// isAssociative - Return true if the instruction is associative:
 ///
-///   Associative operators satisfy:  x op (y op z) === (x op y) op z)
+///   Associative operators satisfy:  x op (y op z) === (x op y) op z
 ///
-/// In LLVM, the Add, Mul, And, Or, and Xor operators are associative, when not
-/// applied to floating point types.
+/// In LLVM, the Add, Mul, And, Or, and Xor operators are associative.
 ///
 bool Instruction::isAssociative(unsigned Opcode, const Type *Ty) {
-  if (Opcode == And || Opcode == Or || Opcode == Xor)
-    return true;
-
-  // Add/Mul reassociate unless they are FP or FP vectors.
-  if (Opcode == Add || Opcode == Mul)
-    return !Ty->isFPOrFPVector();
-  return 0;
+  return Opcode == And || Opcode == Or || Opcode == Xor ||
+         Opcode == Add || Opcode == Mul;
 }
 
 /// isCommutative - Return true if the instruction is commutative:
@@ -355,7 +352,9 @@ bool Instruction::isAssociative(unsigned Opcode, const Type *Ty) {
 bool Instruction::isCommutative(unsigned op) {
   switch (op) {
   case Add:
+  case FAdd:
   case Mul:
+  case FMul:
   case And:
   case Or:
   case Xor:
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index fe30271f8445..4c228fe81c0a 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -1502,29 +1502,43 @@ const Type* ExtractValueInst::getIndexedType(const Type *Agg,
 //                             BinaryOperator Class
 //===----------------------------------------------------------------------===//
 
+/// AdjustIType - Map Add, Sub, and Mul to FAdd, FSub, and FMul when the
+/// type is floating-point, to help provide compatibility with an older API.
+///
+static BinaryOperator::BinaryOps AdjustIType(BinaryOperator::BinaryOps iType,
+                                             const Type *Ty) {
+  // API compatibility: Adjust integer opcodes to floating-point opcodes.
+  if (Ty->isFPOrFPVector()) {
+    if (iType == BinaryOperator::Add) iType = BinaryOperator::FAdd;
+    else if (iType == BinaryOperator::Sub) iType = BinaryOperator::FSub;
+    else if (iType == BinaryOperator::Mul) iType = BinaryOperator::FMul;
+  }
+  return iType;
+}
+
 BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
                                const Type *Ty, const std::string &Name,
                                Instruction *InsertBefore)
-  : Instruction(Ty, iType,
+  : Instruction(Ty, AdjustIType(iType, Ty),
                 OperandTraits<BinaryOperator>::op_begin(this),
                 OperandTraits<BinaryOperator>::operands(this),
                 InsertBefore) {
   Op<0>() = S1;
   Op<1>() = S2;
-  init(iType);
+  init(AdjustIType(iType, Ty));
   setName(Name);
 }
 
 BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, 
                                const Type *Ty, const std::string &Name,
                                BasicBlock *InsertAtEnd)
-  : Instruction(Ty, iType,
+  : Instruction(Ty, AdjustIType(iType, Ty),
                 OperandTraits<BinaryOperator>::op_begin(this),
                 OperandTraits<BinaryOperator>::operands(this),
                 InsertAtEnd) {
   Op<0>() = S1;
   Op<1>() = S2;
-  init(iType);
+  init(AdjustIType(iType, Ty));
   setName(Name);
 }
 
@@ -1537,12 +1551,19 @@ void BinaryOperator::init(BinaryOps iType) {
 #ifndef NDEBUG
   switch (iType) {
   case Add: case Sub:
-  case Mul: 
+  case Mul:
+    assert(getType() == LHS->getType() &&
+           "Arithmetic operation should return same type as operands!");
+    assert(getType()->isIntOrIntVector() &&
+           "Tried to create an integer operation on a non-integer type!");
+    break;
+  case FAdd: case FSub:
+  case FMul:
     assert(getType() == LHS->getType() &&
            "Arithmetic operation should return same type as operands!");
-    assert((getType()->isInteger() || getType()->isFloatingPoint() ||
-            isa<VectorType>(getType())) &&
-          "Tried to create an arithmetic operation on a non-arithmetic type!");
+    assert(getType()->isFPOrFPVector() &&
+           "Tried to create a floating-point operation on a "
+           "non-floating-point type!");
     break;
   case UDiv: 
   case SDiv: 
@@ -1631,6 +1652,22 @@ BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const std::string &Name,
                             Op->getType(), Name, InsertAtEnd);
 }
 
+BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const std::string &Name,
+                                           Instruction *InsertBefore) {
+  Value *zero = ConstantExpr::getZeroValueForNegationExpr(Op->getType());
+  return new BinaryOperator(Instruction::FSub,
+                            zero, Op,
+                            Op->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const std::string &Name,
+                                           BasicBlock *InsertAtEnd) {
+  Value *zero = ConstantExpr::getZeroValueForNegationExpr(Op->getType());
+  return new BinaryOperator(Instruction::FSub,
+                            zero, Op,
+                            Op->getType(), Name, InsertAtEnd);
+}
+
 BinaryOperator *BinaryOperator::CreateNot(Value *Op, const std::string &Name,
                                           Instruction *InsertBefore) {
   Constant *C;
@@ -1679,6 +1716,14 @@ bool BinaryOperator::isNeg(const Value *V) {
   return false;
 }
 
+bool BinaryOperator::isFNeg(const Value *V) {
+  if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
+    if (Bop->getOpcode() == Instruction::FSub)
+      return Bop->getOperand(0) ==
+             ConstantExpr::getZeroValueForNegationExpr(Bop->getType());
+  return false;
+}
+
 bool BinaryOperator::isNot(const Value *V) {
   if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
     return (Bop->getOpcode() == Instruction::Xor &&
@@ -1696,6 +1741,15 @@ const Value *BinaryOperator::getNegArgument(const Value *BinOp) {
   return getNegArgument(const_cast<Value*>(BinOp));
 }
 
+Value *BinaryOperator::getFNegArgument(Value *BinOp) {
+  assert(isFNeg(BinOp) && "getFNegArgument from non-'fneg' instruction!");
+  return cast<BinaryOperator>(BinOp)->getOperand(1);
+}
+
+const Value *BinaryOperator::getFNegArgument(const Value *BinOp) {
+  return getFNegArgument(const_cast<Value*>(BinOp));
+}
+
 Value *BinaryOperator::getNotArgument(Value *BinOp) {
   assert(isNot(BinOp) && "getNotArgument on non-'not' instruction!");
   BinaryOperator *BO = cast<BinaryOperator>(BinOp);
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 59ec3bee715a..b047d0c9fb6d 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -1069,13 +1069,40 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
           "Both operands to a binary operator are not of the same type!", &B);
 
   switch (B.getOpcode()) {
+  // Check that integer arithmetic operators are only used with
+  // integral operands.
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+  case Instruction::SDiv:
+  case Instruction::UDiv:
+  case Instruction::SRem:
+  case Instruction::URem:
+    Assert1(B.getType()->isIntOrIntVector(),
+            "Integer arithmetic operators only work with integral types!", &B);
+    Assert1(B.getType() == B.getOperand(0)->getType(),
+            "Integer arithmetic operators must have same type "
+            "for operands and result!", &B);
+    break;
+  // Check that floating-point arithmetic operators are only used with
+  // floating-point operands.
+  case Instruction::FAdd:
+  case Instruction::FSub:
+  case Instruction::FMul:
+  case Instruction::FDiv:
+  case Instruction::FRem:
+    Assert1(B.getType()->isFPOrFPVector(),
+            "Floating-point arithmetic operators only work with "
+            "floating-point types!", &B);
+    Assert1(B.getType() == B.getOperand(0)->getType(),
+            "Floating-point arithmetic operators must have same type "
+            "for operands and result!", &B);
+    break;
   // Check that logical operators are only used with integral operands.
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor:
-    Assert1(B.getType()->isInteger() ||
-            (isa<VectorType>(B.getType()) && 
-             cast<VectorType>(B.getType())->getElementType()->isInteger()),
+    Assert1(B.getType()->isIntOrIntVector(),
             "Logical operators only work with integral types!", &B);
     Assert1(B.getType() == B.getOperand(0)->getType(),
             "Logical operators must have same type for operands and result!",
@@ -1084,22 +1111,13 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
   case Instruction::Shl:
   case Instruction::LShr:
   case Instruction::AShr:
-    Assert1(B.getType()->isInteger() ||
-            (isa<VectorType>(B.getType()) && 
-             cast<VectorType>(B.getType())->getElementType()->isInteger()),
+    Assert1(B.getType()->isIntOrIntVector(),
             "Shifts only work with integral types!", &B);
     Assert1(B.getType() == B.getOperand(0)->getType(),
             "Shift return type must be same as operands!", &B);
-    /* FALL THROUGH */
-  default:
-    // Arithmetic operators only work on integer or fp values
-    Assert1(B.getType() == B.getOperand(0)->getType(),
-            "Arithmetic operators must have same type for operands and result!",
-            &B);
-    Assert1(B.getType()->isInteger() || B.getType()->isFloatingPoint() ||
-            isa<VectorType>(B.getType()),
-            "Arithmetic operators must have integer, fp, or vector type!", &B);
     break;
+  default:
+    assert(0 && "Unknown BinaryOperator opcode!");
   }
 
   visitInstruction(B);
diff --git a/test/Analysis/ScalarEvolution/sext-iv-0.ll b/test/Analysis/ScalarEvolution/sext-iv-0.ll
index 4b2fcea0df9a..17f2dffdbfcf 100644
--- a/test/Analysis/ScalarEvolution/sext-iv-0.ll
+++ b/test/Analysis/ScalarEvolution/sext-iv-0.ll
@@ -18,7 +18,7 @@ bb1:		; preds = %bb1, %bb1.thread
 	%2 = sext i9 %1 to i64		; <i64> [#uses=1]
 	%3 = getelementptr double* %x, i64 %2		; <double*> [#uses=1]
 	%4 = load double* %3, align 8		; <double> [#uses=1]
-	%5 = mul double %4, 3.900000e+00		; <double> [#uses=1]
+	%5 = fmul double %4, 3.900000e+00		; <double> [#uses=1]
 	%6 = sext i8 %0 to i64		; <i64> [#uses=1]
 	%7 = getelementptr double* %x, i64 %6		; <double*> [#uses=1]
 	store double %5, double* %7, align 8
diff --git a/test/Analysis/ScalarEvolution/sext-iv-1.ll b/test/Analysis/ScalarEvolution/sext-iv-1.ll
index a9175c31d067..ca6ad0aaba12 100644
--- a/test/Analysis/ScalarEvolution/sext-iv-1.ll
+++ b/test/Analysis/ScalarEvolution/sext-iv-1.ll
@@ -18,7 +18,7 @@ bb1:		; preds = %bb1, %bb1.thread
 	%2 = sext i9 %1 to i64		; <i64> [#uses=1]
 	%3 = getelementptr double* %x, i64 %2		; <double*> [#uses=1]
 	%4 = load double* %3, align 8		; <double> [#uses=1]
-	%5 = mul double %4, 3.900000e+00		; <double> [#uses=1]
+	%5 = fmul double %4, 3.900000e+00		; <double> [#uses=1]
 	%6 = sext i7 %0 to i64		; <i64> [#uses=1]
 	%7 = getelementptr double* %x, i64 %6		; <double*> [#uses=1]
 	store double %5, double* %7, align 8
@@ -41,7 +41,7 @@ bb1:		; preds = %bb1, %bb1.thread
 	%2 = sext i9 %1 to i64		; <i64> [#uses=1]
 	%3 = getelementptr double* %x, i64 %2		; <double*> [#uses=1]
 	%4 = load double* %3, align 8		; <double> [#uses=1]
-	%5 = mul double %4, 3.900000e+00		; <double> [#uses=1]
+	%5 = fmul double %4, 3.900000e+00		; <double> [#uses=1]
 	%6 = sext i8 %0 to i64		; <i64> [#uses=1]
 	%7 = getelementptr double* %x, i64 %6		; <double*> [#uses=1]
 	store double %5, double* %7, align 8
@@ -64,7 +64,7 @@ bb1:		; preds = %bb1, %bb1.thread
 	%2 = sext i9 %1 to i64		; <i64> [#uses=1]
 	%3 = getelementptr double* %x, i64 %2		; <double*> [#uses=1]
 	%4 = load double* %3, align 8		; <double> [#uses=1]
-	%5 = mul double %4, 3.900000e+00		; <double> [#uses=1]
+	%5 = fmul double %4, 3.900000e+00		; <double> [#uses=1]
 	%6 = sext i8 %0 to i64		; <i64> [#uses=1]
 	%7 = getelementptr double* %x, i64 %6		; <double*> [#uses=1]
 	store double %5, double* %7, align 8
@@ -87,7 +87,7 @@ bb1:		; preds = %bb1, %bb1.thread
 	%2 = sext i9 %1 to i64		; <i64> [#uses=1]
 	%3 = getelementptr double* %x, i64 %2		; <double*> [#uses=1]
 	%4 = load double* %3, align 8		; <double> [#uses=1]
-	%5 = mul double %4, 3.900000e+00		; <double> [#uses=1]
+	%5 = fmul double %4, 3.900000e+00		; <double> [#uses=1]
 	%6 = sext i8 %0 to i64		; <i64> [#uses=1]
 	%7 = getelementptr double* %x, i64 %6		; <double*> [#uses=1]
 	store double %5, double* %7, align 8
diff --git a/test/Analysis/ScalarEvolution/trip-count4.ll b/test/Analysis/ScalarEvolution/trip-count4.ll
index a61d5da57ea9..49c4e133b467 100644
--- a/test/Analysis/ScalarEvolution/trip-count4.ll
+++ b/test/Analysis/ScalarEvolution/trip-count4.ll
@@ -13,7 +13,7 @@ loop:		; preds = %loop, %entry
 	%indvar.i8 = ashr i64 %s0, 8		; <i64> [#uses=1]
 	%t0 = getelementptr double* %d, i64 %indvar.i8		; <double*> [#uses=2]
 	%t1 = load double* %t0		; <double> [#uses=1]
-	%t2 = mul double %t1, 1.000000e-01		; <double> [#uses=1]
+	%t2 = fmul double %t1, 1.000000e-01		; <double> [#uses=1]
 	store double %t2, double* %t0
 	%indvar.next = sub i64 %indvar, 1		; <i64> [#uses=2]
 	%exitcond = icmp eq i64 %indvar.next, 10		; <i1> [#uses=1]
diff --git a/test/Assembler/2002-04-07-HexFloatConstants.ll b/test/Assembler/2002-04-07-HexFloatConstants.ll
index b9860b3ac5a1..5c54b39b8081 100644
--- a/test/Assembler/2002-04-07-HexFloatConstants.ll
+++ b/test/Assembler/2002-04-07-HexFloatConstants.ll
@@ -11,6 +11,6 @@
 ; RUN: diff %t.1 %t.2
 
 define double @test() {
-        %tmp = mul double 7.200000e+101, 0x427F4000             ; <double> [#uses=1]
+        %tmp = fmul double 7.200000e+101, 0x427F4000             ; <double> [#uses=1]
         ret double %tmp
 }
diff --git a/test/Assembler/2002-04-07-InfConstant.ll b/test/Assembler/2002-04-07-InfConstant.ll
index 317b8f3654c3..71837c94378c 100644
--- a/test/Assembler/2002-04-07-InfConstant.ll
+++ b/test/Assembler/2002-04-07-InfConstant.ll
@@ -3,7 +3,7 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep 0x7FF0000000000000
 
 define float @test() {
-        %tmp = mul float 0x7FF0000000000000, 1.000000e+01               ; <float> [#uses=1]
+        %tmp = fmul float 0x7FF0000000000000, 1.000000e+01               ; <float> [#uses=1]
         ret float %tmp
 }
 
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
index 3661c4c06d69..6e11b1691018 100644
--- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -35,8 +35,8 @@ cond_next589:		; preds = %cond_next489
 	%tmp612 = load i32* null		; <i32> [#uses=1]
 	%tmp629 = load i32* null		; <i32> [#uses=1]
 	%tmp629a = sitofp i32 %tmp629 to double		; <double> [#uses=1]
-	%tmp631 = mul double %tmp629a, 0.000000e+00		; <double> [#uses=1]
-	%tmp632 = add double 0.000000e+00, %tmp631		; <double> [#uses=1]
+	%tmp631 = fmul double %tmp629a, 0.000000e+00		; <double> [#uses=1]
+	%tmp632 = fadd double 0.000000e+00, %tmp631		; <double> [#uses=1]
 	%tmp642 = call fastcc i32 @sign( i32 %tmp576, i32 %tmp561 )		; <i32> [#uses=1]
 	%tmp650 = mul i32 %tmp606, %tmp642		; <i32> [#uses=1]
 	%tmp656 = mul i32 %tmp650, %tmp612		; <i32> [#uses=1]
@@ -46,8 +46,8 @@ cond_next589:		; preds = %cond_next489
 	%tmp666 = sub i32 %tmp660, %tmp496		; <i32> [#uses=1]
 	%tmp667 = sitofp i32 %tmp666 to double		; <double> [#uses=2]
 	call void @levrun_linfo_inter( i32 %tmp576, i32 0, i32* null, i32* null )
-	%tmp671 = mul double %tmp667, %tmp667		; <double> [#uses=1]
-	%tmp675 = add double %tmp671, 0.000000e+00		; <double> [#uses=1]
+	%tmp671 = fmul double %tmp667, %tmp667		; <double> [#uses=1]
+	%tmp675 = fadd double %tmp671, 0.000000e+00		; <double> [#uses=1]
 	%tmp678 = fcmp oeq double %tmp632, %tmp675		; <i1> [#uses=1]
 	br i1 %tmp678, label %cond_true679, label %cond_false693
 
diff --git a/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll b/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
index 7b7ea6bcc493..3f17a5150fbe 100644
--- a/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 184
+; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 164
 
 	%"struct.Adv5::Ekin<3>" = type <{ i8 }>
 	%"struct.Adv5::X::Energyflux<3>" = type { double }
diff --git a/test/CodeGen/ARM/2009-02-27-SpillerBug.ll b/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
index 56e949f832cc..bd5b71959442 100644
--- a/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
+++ b/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
@@ -11,7 +11,7 @@ bb.thread:
 	br label %bb52
 
 bb32:		; preds = %bb52
-	%0 = add double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%0 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
 	%1 = add i32 %j.1, 1		; <i32> [#uses=1]
 	br label %bb52
 
@@ -29,14 +29,14 @@ bb53:		; preds = %bb52
 
 bb55:		; preds = %bb53
 	%4 = load double* @a, align 4		; <double> [#uses=10]
-	%5 = add double %4, 0.000000e+00		; <double> [#uses=16]
+	%5 = fadd double %4, 0.000000e+00		; <double> [#uses=16]
 	%6 = fcmp ogt double %k.4, 0.000000e+00		; <i1> [#uses=1]
-	%.pn404 = mul double %4, %4		; <double> [#uses=4]
-	%.pn402 = mul double %5, %5		; <double> [#uses=5]
+	%.pn404 = fmul double %4, %4		; <double> [#uses=4]
+	%.pn402 = fmul double %5, %5		; <double> [#uses=5]
 	%.pn165.in = load double* @N		; <double> [#uses=5]
-	%.pn198 = mul double 0.000000e+00, %5		; <double> [#uses=1]
-	%.pn185 = sub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
-	%.pn147 = sub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%.pn198 = fmul double 0.000000e+00, %5		; <double> [#uses=1]
+	%.pn185 = fsub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%.pn147 = fsub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
 	%.pn141 = fdiv double 0.000000e+00, %4		; <double> [#uses=1]
 	%.pn142 = fdiv double 0.000000e+00, %5		; <double> [#uses=1]
 	%.pn136 = fdiv double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
@@ -47,178 +47,178 @@ bb55:		; preds = %bb53
 	%.pn117 = fdiv double 0.000000e+00, %4		; <double> [#uses=1]
 	%.pn118 = fdiv double %.pn185, %5		; <double> [#uses=1]
 	%.pn88 = fdiv double %.pn147, %5		; <double> [#uses=1]
-	%.pn81 = sub double %.pn141, %.pn142		; <double> [#uses=1]
-	%.pn77 = sub double 0.000000e+00, %.pn136		; <double> [#uses=1]
-	%.pn75 = sub double 0.000000e+00, %.pn132		; <double> [#uses=1]
-	%.pn69 = sub double %.pn123, %.pn124		; <double> [#uses=1]
-	%.pn67 = sub double 0.000000e+00, %.pn120		; <double> [#uses=1]
-	%.pn56 = sub double %.pn117, %.pn118		; <double> [#uses=1]
-	%.pn42 = sub double 0.000000e+00, %.pn88		; <double> [#uses=1]
-	%.pn60 = mul double %.pn81, 0.000000e+00		; <double> [#uses=1]
-	%.pn57 = add double %.pn77, 0.000000e+00		; <double> [#uses=1]
-	%.pn58 = mul double %.pn75, %.pn165.in		; <double> [#uses=1]
-	%.pn32 = add double %.pn69, 0.000000e+00		; <double> [#uses=1]
-	%.pn33 = mul double %.pn67, %.pn165.in		; <double> [#uses=1]
-	%.pn17 = sub double 0.000000e+00, %.pn60		; <double> [#uses=1]
-	%.pn9 = add double %.pn57, %.pn58		; <double> [#uses=1]
-	%.pn30 = mul double 0.000000e+00, %.pn56		; <double> [#uses=1]
-	%.pn24 = mul double 0.000000e+00, %.pn42		; <double> [#uses=1]
-	%.pn1 = add double %.pn32, %.pn33		; <double> [#uses=1]
-	%.pn28 = sub double %.pn30, 0.000000e+00		; <double> [#uses=1]
-	%.pn26 = add double %.pn28, 0.000000e+00		; <double> [#uses=1]
-	%.pn22 = sub double %.pn26, 0.000000e+00		; <double> [#uses=1]
-	%.pn20 = sub double %.pn24, 0.000000e+00		; <double> [#uses=1]
-	%.pn18 = add double %.pn22, 0.000000e+00		; <double> [#uses=1]
-	%.pn16 = add double %.pn20, 0.000000e+00		; <double> [#uses=1]
-	%.pn14 = sub double %.pn18, 0.000000e+00		; <double> [#uses=1]
-	%.pn12 = sub double %.pn16, %.pn17		; <double> [#uses=1]
-	%.pn10 = add double %.pn14, 0.000000e+00		; <double> [#uses=1]
-	%.pn8 = add double %.pn12, 0.000000e+00		; <double> [#uses=1]
-	%.pn6 = sub double %.pn10, 0.000000e+00		; <double> [#uses=1]
-	%.pn4 = sub double %.pn8, %.pn9		; <double> [#uses=1]
-	%.pn2 = add double %.pn6, 0.000000e+00		; <double> [#uses=1]
-	%.pn = add double %.pn4, 0.000000e+00		; <double> [#uses=1]
-	%N1.0 = sub double %.pn2, 0.000000e+00		; <double> [#uses=2]
-	%D1.0 = sub double %.pn, %.pn1		; <double> [#uses=2]
+	%.pn81 = fsub double %.pn141, %.pn142		; <double> [#uses=1]
+	%.pn77 = fsub double 0.000000e+00, %.pn136		; <double> [#uses=1]
+	%.pn75 = fsub double 0.000000e+00, %.pn132		; <double> [#uses=1]
+	%.pn69 = fsub double %.pn123, %.pn124		; <double> [#uses=1]
+	%.pn67 = fsub double 0.000000e+00, %.pn120		; <double> [#uses=1]
+	%.pn56 = fsub double %.pn117, %.pn118		; <double> [#uses=1]
+	%.pn42 = fsub double 0.000000e+00, %.pn88		; <double> [#uses=1]
+	%.pn60 = fmul double %.pn81, 0.000000e+00		; <double> [#uses=1]
+	%.pn57 = fadd double %.pn77, 0.000000e+00		; <double> [#uses=1]
+	%.pn58 = fmul double %.pn75, %.pn165.in		; <double> [#uses=1]
+	%.pn32 = fadd double %.pn69, 0.000000e+00		; <double> [#uses=1]
+	%.pn33 = fmul double %.pn67, %.pn165.in		; <double> [#uses=1]
+	%.pn17 = fsub double 0.000000e+00, %.pn60		; <double> [#uses=1]
+	%.pn9 = fadd double %.pn57, %.pn58		; <double> [#uses=1]
+	%.pn30 = fmul double 0.000000e+00, %.pn56		; <double> [#uses=1]
+	%.pn24 = fmul double 0.000000e+00, %.pn42		; <double> [#uses=1]
+	%.pn1 = fadd double %.pn32, %.pn33		; <double> [#uses=1]
+	%.pn28 = fsub double %.pn30, 0.000000e+00		; <double> [#uses=1]
+	%.pn26 = fadd double %.pn28, 0.000000e+00		; <double> [#uses=1]
+	%.pn22 = fsub double %.pn26, 0.000000e+00		; <double> [#uses=1]
+	%.pn20 = fsub double %.pn24, 0.000000e+00		; <double> [#uses=1]
+	%.pn18 = fadd double %.pn22, 0.000000e+00		; <double> [#uses=1]
+	%.pn16 = fadd double %.pn20, 0.000000e+00		; <double> [#uses=1]
+	%.pn14 = fsub double %.pn18, 0.000000e+00		; <double> [#uses=1]
+	%.pn12 = fsub double %.pn16, %.pn17		; <double> [#uses=1]
+	%.pn10 = fadd double %.pn14, 0.000000e+00		; <double> [#uses=1]
+	%.pn8 = fadd double %.pn12, 0.000000e+00		; <double> [#uses=1]
+	%.pn6 = fsub double %.pn10, 0.000000e+00		; <double> [#uses=1]
+	%.pn4 = fsub double %.pn8, %.pn9		; <double> [#uses=1]
+	%.pn2 = fadd double %.pn6, 0.000000e+00		; <double> [#uses=1]
+	%.pn = fadd double %.pn4, 0.000000e+00		; <double> [#uses=1]
+	%N1.0 = fsub double %.pn2, 0.000000e+00		; <double> [#uses=2]
+	%D1.0 = fsub double %.pn, %.pn1		; <double> [#uses=2]
 	br i1 %6, label %bb62, label %bb64
 
 bb62:		; preds = %bb55
-	%7 = mul double 0.000000e+00, %4		; <double> [#uses=1]
-	%8 = sub double -0.000000e+00, %7		; <double> [#uses=3]
-	%9 = mul double 0.000000e+00, %5		; <double> [#uses=1]
-	%10 = sub double -0.000000e+00, %9		; <double> [#uses=3]
-	%11 = mul double %.pn404, %4		; <double> [#uses=5]
-	%12 = mul double %.pn402, %5		; <double> [#uses=5]
-	%13 = mul double 0.000000e+00, -2.000000e+00		; <double> [#uses=1]
+	%7 = fmul double 0.000000e+00, %4		; <double> [#uses=1]
+	%8 = fsub double -0.000000e+00, %7		; <double> [#uses=3]
+	%9 = fmul double 0.000000e+00, %5		; <double> [#uses=1]
+	%10 = fsub double -0.000000e+00, %9		; <double> [#uses=3]
+	%11 = fmul double %.pn404, %4		; <double> [#uses=5]
+	%12 = fmul double %.pn402, %5		; <double> [#uses=5]
+	%13 = fmul double 0.000000e+00, -2.000000e+00		; <double> [#uses=1]
 	%14 = fdiv double 0.000000e+00, %.pn402		; <double> [#uses=1]
-	%15 = sub double 0.000000e+00, %14		; <double> [#uses=1]
-	%16 = mul double 0.000000e+00, %15		; <double> [#uses=1]
-	%17 = add double %13, %16		; <double> [#uses=1]
-	%18 = mul double %.pn165.in, -2.000000e+00		; <double> [#uses=5]
-	%19 = mul double %18, 0.000000e+00		; <double> [#uses=1]
-	%20 = add double %17, %19		; <double> [#uses=1]
-	%21 = mul double 0.000000e+00, %20		; <double> [#uses=1]
-	%22 = add double 0.000000e+00, %21		; <double> [#uses=1]
+	%15 = fsub double 0.000000e+00, %14		; <double> [#uses=1]
+	%16 = fmul double 0.000000e+00, %15		; <double> [#uses=1]
+	%17 = fadd double %13, %16		; <double> [#uses=1]
+	%18 = fmul double %.pn165.in, -2.000000e+00		; <double> [#uses=5]
+	%19 = fmul double %18, 0.000000e+00		; <double> [#uses=1]
+	%20 = fadd double %17, %19		; <double> [#uses=1]
+	%21 = fmul double 0.000000e+00, %20		; <double> [#uses=1]
+	%22 = fadd double 0.000000e+00, %21		; <double> [#uses=1]
 	%23 = fdiv double 0.000000e+00, %12		; <double> [#uses=1]
-	%24 = sub double 0.000000e+00, %23		; <double> [#uses=0]
-	%25 = mul double %18, 0.000000e+00		; <double> [#uses=1]
-	%26 = add double 0.000000e+00, %25		; <double> [#uses=1]
-	%27 = mul double 0.000000e+00, %26		; <double> [#uses=1]
-	%28 = sub double %22, %27		; <double> [#uses=1]
-	%29 = mul double %11, %4		; <double> [#uses=1]
-	%30 = mul double %12, %5		; <double> [#uses=3]
-	%31 = mul double %.pn165.in, -4.000000e+00		; <double> [#uses=1]
-	%32 = mul double %.pn165.in, 0x3FF5555555555555		; <double> [#uses=1]
-	%33 = mul double %32, 0.000000e+00		; <double> [#uses=2]
-	%34 = add double %28, 0.000000e+00		; <double> [#uses=1]
-	%35 = sub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%24 = fsub double 0.000000e+00, %23		; <double> [#uses=0]
+	%25 = fmul double %18, 0.000000e+00		; <double> [#uses=1]
+	%26 = fadd double 0.000000e+00, %25		; <double> [#uses=1]
+	%27 = fmul double 0.000000e+00, %26		; <double> [#uses=1]
+	%28 = fsub double %22, %27		; <double> [#uses=1]
+	%29 = fmul double %11, %4		; <double> [#uses=1]
+	%30 = fmul double %12, %5		; <double> [#uses=3]
+	%31 = fmul double %.pn165.in, -4.000000e+00		; <double> [#uses=1]
+	%32 = fmul double %.pn165.in, 0x3FF5555555555555		; <double> [#uses=1]
+	%33 = fmul double %32, 0.000000e+00		; <double> [#uses=2]
+	%34 = fadd double %28, 0.000000e+00		; <double> [#uses=1]
+	%35 = fsub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
 	%36 = fdiv double %35, %11		; <double> [#uses=1]
 	%37 = fdiv double 0.000000e+00, %12		; <double> [#uses=1]
-	%38 = sub double %36, %37		; <double> [#uses=1]
-	%39 = mul double 0.000000e+00, %38		; <double> [#uses=1]
-	%40 = add double 0.000000e+00, %39		; <double> [#uses=1]
-	%41 = add double %40, 0.000000e+00		; <double> [#uses=1]
-	%42 = add double %41, 0.000000e+00		; <double> [#uses=1]
-	%43 = mul double %42, 0.000000e+00		; <double> [#uses=1]
-	%44 = sub double %34, %43		; <double> [#uses=1]
+	%38 = fsub double %36, %37		; <double> [#uses=1]
+	%39 = fmul double 0.000000e+00, %38		; <double> [#uses=1]
+	%40 = fadd double 0.000000e+00, %39		; <double> [#uses=1]
+	%41 = fadd double %40, 0.000000e+00		; <double> [#uses=1]
+	%42 = fadd double %41, 0.000000e+00		; <double> [#uses=1]
+	%43 = fmul double %42, 0.000000e+00		; <double> [#uses=1]
+	%44 = fsub double %34, %43		; <double> [#uses=1]
 	%45 = tail call double @llvm.exp.f64(double %8) nounwind		; <double> [#uses=1]
-	%46 = sub double -0.000000e+00, %45		; <double> [#uses=2]
+	%46 = fsub double -0.000000e+00, %45		; <double> [#uses=2]
 	%47 = fdiv double %46, 0.000000e+00		; <double> [#uses=1]
-	%48 = mul double %30, %5		; <double> [#uses=1]
+	%48 = fmul double %30, %5		; <double> [#uses=1]
 	%49 = fdiv double 0.000000e+00, %48		; <double> [#uses=1]
-	%50 = sub double %47, %49		; <double> [#uses=1]
-	%51 = mul double %50, -4.000000e+00		; <double> [#uses=1]
-	%52 = add double %51, 0.000000e+00		; <double> [#uses=1]
+	%50 = fsub double %47, %49		; <double> [#uses=1]
+	%51 = fmul double %50, -4.000000e+00		; <double> [#uses=1]
+	%52 = fadd double %51, 0.000000e+00		; <double> [#uses=1]
 	%53 = fdiv double %46, %11		; <double> [#uses=1]
-	%54 = sub double %53, 0.000000e+00		; <double> [#uses=1]
-	%55 = mul double %31, %54		; <double> [#uses=1]
-	%56 = add double %52, %55		; <double> [#uses=1]
-	%57 = add double %56, 0.000000e+00		; <double> [#uses=1]
-	%58 = add double %44, %57		; <double> [#uses=1]
-	%59 = sub double %58, 0.000000e+00		; <double> [#uses=1]
+	%54 = fsub double %53, 0.000000e+00		; <double> [#uses=1]
+	%55 = fmul double %31, %54		; <double> [#uses=1]
+	%56 = fadd double %52, %55		; <double> [#uses=1]
+	%57 = fadd double %56, 0.000000e+00		; <double> [#uses=1]
+	%58 = fadd double %44, %57		; <double> [#uses=1]
+	%59 = fsub double %58, 0.000000e+00		; <double> [#uses=1]
 	%60 = tail call double @llvm.exp.f64(double 0.000000e+00) nounwind		; <double> [#uses=1]
-	%61 = sub double -0.000000e+00, %60		; <double> [#uses=1]
+	%61 = fsub double -0.000000e+00, %60		; <double> [#uses=1]
 	%62 = fdiv double 0.000000e+00, -6.000000e+00		; <double> [#uses=1]
 	%63 = fdiv double %61, %5		; <double> [#uses=1]
-	%64 = sub double 0.000000e+00, %63		; <double> [#uses=1]
-	%65 = mul double %62, %64		; <double> [#uses=1]
-	%66 = sub double 0.000000e+00, %65		; <double> [#uses=1]
-	%67 = sub double -0.000000e+00, 0.000000e+00		; <double> [#uses=2]
+	%64 = fsub double 0.000000e+00, %63		; <double> [#uses=1]
+	%65 = fmul double %62, %64		; <double> [#uses=1]
+	%66 = fsub double 0.000000e+00, %65		; <double> [#uses=1]
+	%67 = fsub double -0.000000e+00, 0.000000e+00		; <double> [#uses=2]
 	%68 = tail call double @llvm.exp.f64(double %10) nounwind		; <double> [#uses=1]
-	%69 = sub double -0.000000e+00, %68		; <double> [#uses=2]
+	%69 = fsub double -0.000000e+00, %68		; <double> [#uses=2]
 	%70 = fdiv double %67, %.pn404		; <double> [#uses=1]
 	%71 = fdiv double %69, %.pn402		; <double> [#uses=1]
-	%72 = sub double %70, %71		; <double> [#uses=1]
-	%73 = mul double %72, -5.000000e-01		; <double> [#uses=1]
+	%72 = fsub double %70, %71		; <double> [#uses=1]
+	%73 = fmul double %72, -5.000000e-01		; <double> [#uses=1]
 	%74 = fdiv double %67, %4		; <double> [#uses=1]
 	%75 = fdiv double %69, %5		; <double> [#uses=1]
-	%76 = sub double %74, %75		; <double> [#uses=1]
-	%77 = mul double %76, 0.000000e+00		; <double> [#uses=1]
-	%78 = add double %73, %77		; <double> [#uses=1]
-	%79 = mul double 0.000000e+00, %78		; <double> [#uses=1]
-	%80 = add double %66, %79		; <double> [#uses=1]
+	%76 = fsub double %74, %75		; <double> [#uses=1]
+	%77 = fmul double %76, 0.000000e+00		; <double> [#uses=1]
+	%78 = fadd double %73, %77		; <double> [#uses=1]
+	%79 = fmul double 0.000000e+00, %78		; <double> [#uses=1]
+	%80 = fadd double %66, %79		; <double> [#uses=1]
 	%81 = fdiv double 0.000000e+00, %.pn404		; <double> [#uses=1]
 	%82 = fdiv double 0.000000e+00, %.pn402		; <double> [#uses=1]
-	%83 = sub double %81, %82		; <double> [#uses=1]
-	%84 = mul double %83, -5.000000e-01		; <double> [#uses=1]
+	%83 = fsub double %81, %82		; <double> [#uses=1]
+	%84 = fmul double %83, -5.000000e-01		; <double> [#uses=1]
 	%85 = fdiv double 0.000000e+00, %4		; <double> [#uses=1]
 	%86 = fdiv double 0.000000e+00, %5		; <double> [#uses=1]
-	%87 = sub double %85, %86		; <double> [#uses=1]
-	%88 = mul double %87, 0.000000e+00		; <double> [#uses=1]
-	%89 = add double %84, %88		; <double> [#uses=1]
-	%90 = mul double 0.000000e+00, %89		; <double> [#uses=1]
-	%91 = sub double %80, %90		; <double> [#uses=1]
+	%87 = fsub double %85, %86		; <double> [#uses=1]
+	%88 = fmul double %87, 0.000000e+00		; <double> [#uses=1]
+	%89 = fadd double %84, %88		; <double> [#uses=1]
+	%90 = fmul double 0.000000e+00, %89		; <double> [#uses=1]
+	%91 = fsub double %80, %90		; <double> [#uses=1]
 	%92 = tail call double @llvm.exp.f64(double %8) nounwind		; <double> [#uses=1]
-	%93 = sub double -0.000000e+00, %92		; <double> [#uses=1]
+	%93 = fsub double -0.000000e+00, %92		; <double> [#uses=1]
 	%94 = tail call double @llvm.exp.f64(double %10) nounwind		; <double> [#uses=1]
-	%95 = sub double -0.000000e+00, %94		; <double> [#uses=3]
+	%95 = fsub double -0.000000e+00, %94		; <double> [#uses=3]
 	%96 = fdiv double %95, %.pn402		; <double> [#uses=1]
-	%97 = sub double 0.000000e+00, %96		; <double> [#uses=1]
-	%98 = mul double 0.000000e+00, %97		; <double> [#uses=1]
+	%97 = fsub double 0.000000e+00, %96		; <double> [#uses=1]
+	%98 = fmul double 0.000000e+00, %97		; <double> [#uses=1]
 	%99 = fdiv double %93, %11		; <double> [#uses=1]
 	%100 = fdiv double %95, %12		; <double> [#uses=1]
-	%101 = sub double %99, %100		; <double> [#uses=1]
-	%102 = sub double %98, %101		; <double> [#uses=1]
+	%101 = fsub double %99, %100		; <double> [#uses=1]
+	%102 = fsub double %98, %101		; <double> [#uses=1]
 	%103 = fdiv double %95, %5		; <double> [#uses=1]
-	%104 = sub double 0.000000e+00, %103		; <double> [#uses=1]
-	%105 = mul double %18, %104		; <double> [#uses=1]
-	%106 = add double %102, %105		; <double> [#uses=1]
-	%107 = mul double %106, %k.4		; <double> [#uses=1]
-	%108 = add double %91, %107		; <double> [#uses=1]
-	%109 = sub double %108, 0.000000e+00		; <double> [#uses=1]
+	%104 = fsub double 0.000000e+00, %103		; <double> [#uses=1]
+	%105 = fmul double %18, %104		; <double> [#uses=1]
+	%106 = fadd double %102, %105		; <double> [#uses=1]
+	%107 = fmul double %106, %k.4		; <double> [#uses=1]
+	%108 = fadd double %91, %107		; <double> [#uses=1]
+	%109 = fsub double %108, 0.000000e+00		; <double> [#uses=1]
 	%110 = tail call double @llvm.exp.f64(double %8) nounwind		; <double> [#uses=1]
-	%111 = sub double -0.000000e+00, %110		; <double> [#uses=2]
+	%111 = fsub double -0.000000e+00, %110		; <double> [#uses=2]
 	%112 = tail call double @llvm.exp.f64(double %10) nounwind		; <double> [#uses=1]
-	%113 = sub double -0.000000e+00, %112		; <double> [#uses=2]
+	%113 = fsub double -0.000000e+00, %112		; <double> [#uses=2]
 	%114 = fdiv double %111, %11		; <double> [#uses=1]
 	%115 = fdiv double %113, %12		; <double> [#uses=1]
-	%116 = sub double %114, %115		; <double> [#uses=1]
-	%117 = mul double 0.000000e+00, %116		; <double> [#uses=1]
+	%116 = fsub double %114, %115		; <double> [#uses=1]
+	%117 = fmul double 0.000000e+00, %116		; <double> [#uses=1]
 	%118 = fdiv double %111, %29		; <double> [#uses=1]
 	%119 = fdiv double %113, %30		; <double> [#uses=1]
-	%120 = sub double %118, %119		; <double> [#uses=1]
-	%121 = sub double %117, %120		; <double> [#uses=1]
-	%122 = mul double %18, 0.000000e+00		; <double> [#uses=1]
-	%123 = add double %121, %122		; <double> [#uses=1]
-	%124 = mul double %33, 0.000000e+00		; <double> [#uses=1]
-	%125 = add double %123, %124		; <double> [#uses=1]
-	%126 = add double %109, %125		; <double> [#uses=1]
+	%120 = fsub double %118, %119		; <double> [#uses=1]
+	%121 = fsub double %117, %120		; <double> [#uses=1]
+	%122 = fmul double %18, 0.000000e+00		; <double> [#uses=1]
+	%123 = fadd double %121, %122		; <double> [#uses=1]
+	%124 = fmul double %33, 0.000000e+00		; <double> [#uses=1]
+	%125 = fadd double %123, %124		; <double> [#uses=1]
+	%126 = fadd double %109, %125		; <double> [#uses=1]
 	%127 = tail call double @llvm.exp.f64(double 0.000000e+00) nounwind		; <double> [#uses=1]
-	%128 = sub double -0.000000e+00, %127		; <double> [#uses=2]
+	%128 = fsub double -0.000000e+00, %127		; <double> [#uses=2]
 	%129 = fdiv double %128, %30		; <double> [#uses=1]
-	%130 = sub double 0.000000e+00, %129		; <double> [#uses=1]
-	%131 = sub double 0.000000e+00, %130		; <double> [#uses=1]
+	%130 = fsub double 0.000000e+00, %129		; <double> [#uses=1]
+	%131 = fsub double 0.000000e+00, %130		; <double> [#uses=1]
 	%132 = fdiv double 0.000000e+00, %.pn404		; <double> [#uses=1]
-	%133 = sub double %132, 0.000000e+00		; <double> [#uses=1]
-	%134 = mul double %18, %133		; <double> [#uses=1]
-	%135 = add double %131, %134		; <double> [#uses=1]
+	%133 = fsub double %132, 0.000000e+00		; <double> [#uses=1]
+	%134 = fmul double %18, %133		; <double> [#uses=1]
+	%135 = fadd double %131, %134		; <double> [#uses=1]
 	%136 = fdiv double %128, %5		; <double> [#uses=1]
-	%137 = sub double 0.000000e+00, %136		; <double> [#uses=1]
-	%138 = mul double %33, %137		; <double> [#uses=1]
-	%139 = add double %135, %138		; <double> [#uses=1]
-	%140 = sub double %126, %139		; <double> [#uses=1]
-	%141 = add double %N1.0, %59		; <double> [#uses=1]
-	%142 = add double %D1.0, %140		; <double> [#uses=1]
+	%137 = fsub double 0.000000e+00, %136		; <double> [#uses=1]
+	%138 = fmul double %33, %137		; <double> [#uses=1]
+	%139 = fadd double %135, %138		; <double> [#uses=1]
+	%140 = fsub double %126, %139		; <double> [#uses=1]
+	%141 = fadd double %N1.0, %59		; <double> [#uses=1]
+	%142 = fadd double %D1.0, %140		; <double> [#uses=1]
 	br label %bb64
 
 bb64:		; preds = %bb62, %bb55
diff --git a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
index 7556616f995d..399ed3081f20 100644
--- a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
+++ b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
@@ -26,39 +26,39 @@ entry:
 
 bb3:		; preds = %entry
 	%2 = fdiv double 1.000000e+00, 0.000000e+00		; <double> [#uses=1]
-	%3 = mul double 0.000000e+00, %2		; <double> [#uses=2]
+	%3 = fmul double 0.000000e+00, %2		; <double> [#uses=2]
 	%4 = call double @llvm.sqrt.f64(double 0.000000e+00) nounwind		; <double> [#uses=1]
 	%5 = fdiv double 1.000000e+00, %4		; <double> [#uses=2]
-	%6 = mul double %3, %5		; <double> [#uses=2]
-	%7 = mul double 0.000000e+00, %5		; <double> [#uses=2]
-	%8 = mul double %3, %7		; <double> [#uses=1]
-	%9 = sub double %8, 0.000000e+00		; <double> [#uses=1]
-	%10 = mul double 0.000000e+00, %6		; <double> [#uses=1]
-	%11 = sub double 0.000000e+00, %10		; <double> [#uses=1]
-	%12 = sub double -0.000000e+00, %11		; <double> [#uses=1]
-	%13 = mul double %0, %0		; <double> [#uses=2]
-	%14 = sub double %13, 0.000000e+00		; <double> [#uses=1]
+	%6 = fmul double %3, %5		; <double> [#uses=2]
+	%7 = fmul double 0.000000e+00, %5		; <double> [#uses=2]
+	%8 = fmul double %3, %7		; <double> [#uses=1]
+	%9 = fsub double %8, 0.000000e+00		; <double> [#uses=1]
+	%10 = fmul double 0.000000e+00, %6		; <double> [#uses=1]
+	%11 = fsub double 0.000000e+00, %10		; <double> [#uses=1]
+	%12 = fsub double -0.000000e+00, %11		; <double> [#uses=1]
+	%13 = fmul double %0, %0		; <double> [#uses=2]
+	%14 = fsub double %13, 0.000000e+00		; <double> [#uses=1]
 	%15 = call double @llvm.sqrt.f64(double %14)		; <double> [#uses=1]
-	%16 = mul double 0.000000e+00, %15		; <double> [#uses=1]
+	%16 = fmul double 0.000000e+00, %15		; <double> [#uses=1]
 	%17 = fdiv double %16, %0		; <double> [#uses=1]
-	%18 = add double 0.000000e+00, %17		; <double> [#uses=1]
+	%18 = fadd double 0.000000e+00, %17		; <double> [#uses=1]
 	%19 = call double @acos(double %18) nounwind readonly		; <double> [#uses=1]
 	%20 = load double* null, align 4		; <double> [#uses=1]
-	%21 = mul double %20, 0x401921FB54442D18		; <double> [#uses=1]
+	%21 = fmul double %20, 0x401921FB54442D18		; <double> [#uses=1]
 	%22 = call double @sin(double %19) nounwind readonly		; <double> [#uses=2]
-	%23 = mul double %22, 0.000000e+00		; <double> [#uses=2]
-	%24 = mul double %6, %23		; <double> [#uses=1]
-	%25 = mul double %7, %23		; <double> [#uses=1]
+	%23 = fmul double %22, 0.000000e+00		; <double> [#uses=2]
+	%24 = fmul double %6, %23		; <double> [#uses=1]
+	%25 = fmul double %7, %23		; <double> [#uses=1]
 	%26 = call double @sin(double %21) nounwind readonly		; <double> [#uses=1]
-	%27 = mul double %22, %26		; <double> [#uses=2]
-	%28 = mul double %9, %27		; <double> [#uses=1]
-	%29 = mul double %27, %12		; <double> [#uses=1]
-	%30 = add double %24, %28		; <double> [#uses=1]
-	%31 = add double 0.000000e+00, %29		; <double> [#uses=1]
-	%32 = add double %25, 0.000000e+00		; <double> [#uses=1]
-	%33 = add double %30, 0.000000e+00		; <double> [#uses=1]
-	%34 = add double %31, 0.000000e+00		; <double> [#uses=1]
-	%35 = add double %32, 0.000000e+00		; <double> [#uses=1]
+	%27 = fmul double %22, %26		; <double> [#uses=2]
+	%28 = fmul double %9, %27		; <double> [#uses=1]
+	%29 = fmul double %27, %12		; <double> [#uses=1]
+	%30 = fadd double %24, %28		; <double> [#uses=1]
+	%31 = fadd double 0.000000e+00, %29		; <double> [#uses=1]
+	%32 = fadd double %25, 0.000000e+00		; <double> [#uses=1]
+	%33 = fadd double %30, 0.000000e+00		; <double> [#uses=1]
+	%34 = fadd double %31, 0.000000e+00		; <double> [#uses=1]
+	%35 = fadd double %32, 0.000000e+00		; <double> [#uses=1]
 	%36 = bitcast %struct.ggPoint3* %x to i8*		; <i8*> [#uses=1]
 	call void @llvm.memcpy.i32(i8* null, i8* %36, i32 24, i32 4) nounwind
 	store double %33, double* null, align 8
@@ -68,9 +68,9 @@ bb5.i.i.i:		; preds = %bb3
 	unreachable
 
 _Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit:		; preds = %bb3
-	%37 = sub double %13, 0.000000e+00		; <double> [#uses=0]
-	%38 = sub double -0.000000e+00, %34		; <double> [#uses=0]
-	%39 = sub double -0.000000e+00, %35		; <double> [#uses=0]
+	%37 = fsub double %13, 0.000000e+00		; <double> [#uses=0]
+	%38 = fsub double -0.000000e+00, %34		; <double> [#uses=0]
+	%39 = fsub double -0.000000e+00, %35		; <double> [#uses=0]
 	ret i32 1
 
 bb7:		; preds = %entry
diff --git a/test/CodeGen/ARM/2009-04-08-FloatUndef.ll b/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
index 9dc3b3485ec6..f394847362f9 100644
--- a/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
+++ b/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
@@ -4,8 +4,8 @@ define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>* %C
 entry:
 	%input2 = load <4 x float>* null, align 16		; <<4 x float>> [#uses=2]
 	%shuffle7 = shufflevector <4 x float> %input2, <4 x float> <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>		; <<4 x float>> [#uses=1]
-	%mul1 = mul <4 x float> %shuffle7, zeroinitializer		; <<4 x float>> [#uses=1]
-	%add2 = add <4 x float> %mul1, %input2		; <<4 x float>> [#uses=1]
+	%mul1 = fmul <4 x float> %shuffle7, zeroinitializer		; <<4 x float>> [#uses=1]
+	%add2 = fadd <4 x float> %mul1, %input2		; <<4 x float>> [#uses=1]
 	store <4 x float> %add2, <4 x float>* null, align 16
 	ret void
 }
diff --git a/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll b/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
new file mode 100644
index 000000000000..5eaae7aa9b46
--- /dev/null
+++ b/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
@@ -0,0 +1,263 @@
+; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6
+
+	%struct.anon = type { i16, i16 }
+	%struct.cab_archive = type { i32, i16, i16, i16, i16, i8, %struct.cab_folder*, %struct.cab_file* }
+	%struct.cab_file = type { i32, i16, i64, i8*, i32, i32, i32, %struct.cab_folder*, %struct.cab_file*, %struct.cab_archive*, %struct.cab_state* }
+	%struct.cab_folder = type { i16, i16, %struct.cab_archive*, i64, %struct.cab_folder* }
+	%struct.cab_state = type { i8*, i8*, [38912 x i8], i16, i16, i8*, i16 }
+	%struct.qtm_model = type { i32, i32, %struct.anon* }
+	%struct.qtm_stream = type { i32, i32, i8, i8*, i32, i32, i32, i16, i16, i16, i8, i32, i8*, i8*, i8*, i8*, i8*, i32, i32, i8, [42 x i32], [42 x i8], [27 x i8], [27 x i8], %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, [65 x %struct.anon], [65 x %struct.anon], [65 x %struct.anon], [65 x %struct.anon], [25 x %struct.anon], [37 x %struct.anon], [43 x %struct.anon], [28 x %struct.anon], [8 x %struct.anon], %struct.cab_file*, i32 (%struct.cab_file*, i8*, i32)* }
+
+declare fastcc i32 @qtm_read_input(%struct.qtm_stream* nocapture) nounwind
+
+define fastcc i32 @qtm_decompress(%struct.qtm_stream* %qtm, i64 %out_bytes) nounwind {
+entry:
+	br i1 undef, label %bb245, label %bb3
+
+bb3:		; preds = %entry
+	br i1 undef, label %bb5, label %bb4
+
+bb4:		; preds = %bb3
+	ret i32 undef
+
+bb5:		; preds = %bb3
+	br i1 undef, label %bb245, label %bb14
+
+bb14:		; preds = %bb5
+	br label %bb238
+
+bb28:		; preds = %bb215
+	br label %bb31
+
+bb29:		; preds = %bb31
+	br i1 undef, label %bb31, label %bb32
+
+bb31:		; preds = %bb29, %bb28
+	br i1 undef, label %bb29, label %bb32
+
+bb32:		; preds = %bb31, %bb29
+	br label %bb33
+
+bb33:		; preds = %bb33, %bb32
+	br i1 undef, label %bb34, label %bb33
+
+bb34:		; preds = %bb33
+	br i1 undef, label %bb35, label %bb36
+
+bb35:		; preds = %bb34
+	br label %bb36
+
+bb36:		; preds = %bb46, %bb35, %bb34
+	br i1 undef, label %bb40, label %bb37
+
+bb37:		; preds = %bb36
+	br i1 undef, label %bb77, label %bb60
+
+bb40:		; preds = %bb36
+	br i1 undef, label %bb46, label %bb41
+
+bb41:		; preds = %bb40
+	br i1 undef, label %bb45, label %bb42
+
+bb42:		; preds = %bb41
+	ret i32 undef
+
+bb45:		; preds = %bb41
+	br label %bb46
+
+bb46:		; preds = %bb45, %bb40
+	br label %bb36
+
+bb60:		; preds = %bb60, %bb37
+	br label %bb60
+
+bb77:		; preds = %bb37
+	switch i32 undef, label %bb197 [
+		i32 5, label %bb108
+		i32 6, label %bb138
+	]
+
+bb108:		; preds = %bb77
+	br label %bb111
+
+bb109:		; preds = %bb111
+	br i1 undef, label %bb111, label %bb112
+
+bb111:		; preds = %bb109, %bb108
+	br i1 undef, label %bb109, label %bb112
+
+bb112:		; preds = %bb111, %bb109
+	br label %bb113
+
+bb113:		; preds = %bb113, %bb112
+	br i1 undef, label %bb114, label %bb113
+
+bb114:		; preds = %bb113
+	br i1 undef, label %bb115, label %bb116
+
+bb115:		; preds = %bb114
+	br label %bb116
+
+bb116:		; preds = %bb115, %bb114
+	br i1 undef, label %bb120, label %bb117
+
+bb117:		; preds = %bb116
+	br label %bb136
+
+bb120:		; preds = %bb116
+	ret i32 undef
+
+bb128:		; preds = %bb136
+	br i1 undef, label %bb134, label %bb129
+
+bb129:		; preds = %bb128
+	br i1 undef, label %bb133, label %bb130
+
+bb130:		; preds = %bb129
+	br i1 undef, label %bb132, label %bb131
+
+bb131:		; preds = %bb130
+	ret i32 undef
+
+bb132:		; preds = %bb130
+	br label %bb133
+
+bb133:		; preds = %bb132, %bb129
+	br label %bb134
+
+bb134:		; preds = %bb133, %bb128
+	br label %bb136
+
+bb136:		; preds = %bb134, %bb117
+	br i1 undef, label %bb198, label %bb128
+
+bb138:		; preds = %bb77
+	%0 = trunc i32 undef to i16		; <i16> [#uses=1]
+	br label %bb141
+
+bb139:		; preds = %bb141
+	%scevgep441442881 = load i16* undef		; <i16> [#uses=1]
+	%1 = icmp ugt i16 %scevgep441442881, %0		; <i1> [#uses=1]
+	br i1 %1, label %bb141, label %bb142
+
+bb141:		; preds = %bb139, %bb138
+	br i1 undef, label %bb139, label %bb142
+
+bb142:		; preds = %bb141, %bb139
+	br label %bb143
+
+bb143:		; preds = %bb143, %bb142
+	br i1 undef, label %bb144, label %bb143
+
+bb144:		; preds = %bb143
+	br i1 undef, label %bb145, label %bb146
+
+bb145:		; preds = %bb144
+	unreachable
+
+bb146:		; preds = %bb156, %bb144
+	br i1 undef, label %bb150, label %bb147
+
+bb147:		; preds = %bb146
+	br i1 undef, label %bb157, label %bb148
+
+bb148:		; preds = %bb147
+	br i1 undef, label %bb149, label %bb157
+
+bb149:		; preds = %bb148
+	br label %bb150
+
+bb150:		; preds = %bb149, %bb146
+	br i1 undef, label %bb156, label %bb152
+
+bb152:		; preds = %bb150
+	unreachable
+
+bb156:		; preds = %bb150
+	br label %bb146
+
+bb157:		; preds = %bb148, %bb147
+	br i1 undef, label %bb167, label %bb160
+
+bb160:		; preds = %bb157
+	ret i32 undef
+
+bb167:		; preds = %bb157
+	br label %bb170
+
+bb168:		; preds = %bb170
+	br i1 undef, label %bb170, label %bb171
+
+bb170:		; preds = %bb168, %bb167
+	br i1 undef, label %bb168, label %bb171
+
+bb171:		; preds = %bb170, %bb168
+	br label %bb172
+
+bb172:		; preds = %bb172, %bb171
+	br i1 undef, label %bb173, label %bb172
+
+bb173:		; preds = %bb172
+	br i1 undef, label %bb174, label %bb175
+
+bb174:		; preds = %bb173
+	unreachable
+
+bb175:		; preds = %bb179, %bb173
+	br i1 undef, label %bb179, label %bb176
+
+bb176:		; preds = %bb175
+	br i1 undef, label %bb186, label %bb177
+
+bb177:		; preds = %bb176
+	br i1 undef, label %bb178, label %bb186
+
+bb178:		; preds = %bb177
+	br label %bb179
+
+bb179:		; preds = %bb178, %bb175
+	br label %bb175
+
+bb186:		; preds = %bb177, %bb176
+	br label %bb195
+
+bb187:		; preds = %bb195
+	br i1 undef, label %bb193, label %bb189
+
+bb189:		; preds = %bb187
+	%2 = tail call fastcc i32 @qtm_read_input(%struct.qtm_stream* %qtm) nounwind		; <i32> [#uses=0]
+	ret i32 undef
+
+bb193:		; preds = %bb187
+	br label %bb195
+
+bb195:		; preds = %bb193, %bb186
+	br i1 undef, label %bb198, label %bb187
+
+bb197:		; preds = %bb77
+	ret i32 -124
+
+bb198:		; preds = %bb195, %bb136
+	br i1 undef, label %bb211.preheader, label %bb214
+
+bb211.preheader:		; preds = %bb198
+	br label %bb211
+
+bb211:		; preds = %bb211, %bb211.preheader
+	br i1 undef, label %bb214, label %bb211
+
+bb214:		; preds = %bb211, %bb198
+	br label %bb215
+
+bb215:		; preds = %bb238, %bb214
+	br i1 undef, label %bb28, label %bb216
+
+bb216:		; preds = %bb215
+	br label %bb238
+
+bb238:		; preds = %bb216, %bb14
+	br label %bb215
+
+bb245:		; preds = %bb5, %entry
+	ret i32 undef
+}
diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll
index 3b499a4021c1..4f4091af4837 100644
--- a/test/CodeGen/ARM/cse-libcalls.ll
+++ b/test/CodeGen/ARM/cse-libcalls.ll
@@ -16,7 +16,7 @@ bb28.i:		; preds = %bb28.i, %entry
 	br i1 false, label %bb502.loopexit.i, label %bb28.i
 
 bb.nph53.i:		; preds = %bb502.loopexit.i
-	%tmp354.i = sub double -0.000000e+00, %tmp10.i4		; <double> [#uses=0]
+	%tmp354.i = fsub double -0.000000e+00, %tmp10.i4		; <double> [#uses=0]
 	br label %bb244.i
 
 bb244.i:		; preds = %bb244.i, %bb.nph53.i
diff --git a/test/CodeGen/ARM/fixunsdfdi.ll b/test/CodeGen/ARM/fixunsdfdi.ll
index d3038b9af76c..777a3d69a191 100644
--- a/test/CodeGen/ARM/fixunsdfdi.ll
+++ b/test/CodeGen/ARM/fixunsdfdi.ll
@@ -13,7 +13,7 @@ bb5:		; preds = %bb3
 	%u.in.mask = and i64 %x14, -4294967296		; <i64> [#uses=1]
 	%.ins = or i64 0, %u.in.mask		; <i64> [#uses=1]
 	%0 = bitcast i64 %.ins to double		; <double> [#uses=1]
-	%1 = sub double %x, %0		; <double> [#uses=1]
+	%1 = fsub double %x, %0		; <double> [#uses=1]
 	%2 = fptosi double %1 to i32		; <i32> [#uses=1]
 	%3 = add i32 %2, 0		; <i32> [#uses=1]
 	%4 = zext i32 %3 to i64		; <i64> [#uses=1]
diff --git a/test/CodeGen/ARM/fnmul.ll b/test/CodeGen/ARM/fnmul.ll
index 87a30c99e281..7bbda2d76d5d 100644
--- a/test/CodeGen/ARM/fnmul.ll
+++ b/test/CodeGen/ARM/fnmul.ll
@@ -4,8 +4,8 @@
 
 define double @t1(double %a, double %b) {
 entry:
-        %tmp2 = sub double -0.000000e+00, %a            ; <double> [#uses=1]
-        %tmp4 = mul double %tmp2, %b            ; <double> [#uses=1]
+        %tmp2 = fsub double -0.000000e+00, %a            ; <double> [#uses=1]
+        %tmp4 = fmul double %tmp2, %b            ; <double> [#uses=1]
         ret double %tmp4
 }
 
diff --git a/test/CodeGen/ARM/fparith.ll b/test/CodeGen/ARM/fparith.ll
index 11933d5f70c9..568a6c41a0dd 100644
--- a/test/CodeGen/ARM/fparith.ll
+++ b/test/CodeGen/ARM/fparith.ll
@@ -10,49 +10,49 @@
 
 define float @f1(float %a, float %b) {
 entry:
-	%tmp = add float %a, %b		; <float> [#uses=1]
+	%tmp = fadd float %a, %b		; <float> [#uses=1]
 	ret float %tmp
 }
 
 define double @f2(double %a, double %b) {
 entry:
-	%tmp = add double %a, %b		; <double> [#uses=1]
+	%tmp = fadd double %a, %b		; <double> [#uses=1]
 	ret double %tmp
 }
 
 define float @f3(float %a, float %b) {
 entry:
-	%tmp = mul float %a, %b		; <float> [#uses=1]
+	%tmp = fmul float %a, %b		; <float> [#uses=1]
 	ret float %tmp
 }
 
 define double @f4(double %a, double %b) {
 entry:
-	%tmp = mul double %a, %b		; <double> [#uses=1]
+	%tmp = fmul double %a, %b		; <double> [#uses=1]
 	ret double %tmp
 }
 
 define float @f5(float %a, float %b) {
 entry:
-	%tmp = sub float %a, %b		; <float> [#uses=1]
+	%tmp = fsub float %a, %b		; <float> [#uses=1]
 	ret float %tmp
 }
 
 define double @f6(double %a, double %b) {
 entry:
-	%tmp = sub double %a, %b		; <double> [#uses=1]
+	%tmp = fsub double %a, %b		; <double> [#uses=1]
 	ret double %tmp
 }
 
 define float @f7(float %a) {
 entry:
-	%tmp1 = sub float -0.000000e+00, %a		; <float> [#uses=1]
+	%tmp1 = fsub float -0.000000e+00, %a		; <float> [#uses=1]
 	ret float %tmp1
 }
 
 define double @f8(double %a) {
 entry:
-	%tmp1 = sub double -0.000000e+00, %a		; <double> [#uses=1]
+	%tmp1 = fsub double -0.000000e+00, %a		; <double> [#uses=1]
 	ret double %tmp1
 }
 
diff --git a/test/CodeGen/ARM/fpmem.ll b/test/CodeGen/ARM/fpmem.ll
index 48204ecdebf9..13653bbe6aa0 100644
--- a/test/CodeGen/ARM/fpmem.ll
+++ b/test/CodeGen/ARM/fpmem.ll
@@ -11,12 +11,12 @@ define float @f1(float %a) {
 
 define float @f2(float* %v, float %u) {
         %tmp = load float* %v           ; <float> [#uses=1]
-        %tmp1 = add float %tmp, %u              ; <float> [#uses=1]
+        %tmp1 = fadd float %tmp, %u              ; <float> [#uses=1]
         ret float %tmp1
 }
 
 define void @f3(float %a, float %b, float* %v) {
-        %tmp = add float %a, %b         ; <float> [#uses=1]
+        %tmp = fadd float %a, %b         ; <float> [#uses=1]
         store float %tmp, float* %v
         ret void
 }
diff --git a/test/CodeGen/ARM/illegal-vector-bitcast.ll b/test/CodeGen/ARM/illegal-vector-bitcast.ll
index 79f9929b7df3..ad24eb5dad71 100644
--- a/test/CodeGen/ARM/illegal-vector-bitcast.ll
+++ b/test/CodeGen/ARM/illegal-vector-bitcast.ll
@@ -3,7 +3,7 @@
 define void @foo(<8 x float>* %f, <8 x float>* %g, <4 x i64>* %y)
 {
   %h = load <8 x float>* %f
-  %i = mul <8 x float> %h, <float 0x3FF19999A0000000, float 0x400A666660000000, float 0x40119999A0000000, float 0x40159999A0000000, float 0.5, float 0x3FE3333340000000, float 0x3FE6666660000000, float 0x3FE99999A0000000>
+  %i = fmul <8 x float> %h, <float 0x3FF19999A0000000, float 0x400A666660000000, float 0x40119999A0000000, float 0x40159999A0000000, float 0.5, float 0x3FE3333340000000, float 0x3FE6666660000000, float 0x3FE99999A0000000>
   %m = bitcast <8 x float> %i to <4 x i64>
   %z = load <4 x i64>* %y
   %n = mul <4 x i64> %z, %m
diff --git a/test/CodeGen/ARM/lsr-scale-addr-mode.ll b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
index 6db0d43e8343..02902f2debd3 100644
--- a/test/CodeGen/ARM/lsr-scale-addr-mode.ll
+++ b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep -F {str r2, \[r0, +r3, lsl #2\]}
+; RUN: llvm-as < %s | llc -march=arm | grep lsl | grep -F {lsl #2\]}
 ; Should use scaled addressing mode.
 
 define void @sintzero(i32* %a) nounwind {
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index 5d1beea5fc25..4bf0b4f6f3b1 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -1,9 +1,7 @@
+; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldmia
+; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep stmia
 ; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldrb
 ; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldrh
-; This used to look for ldmia. But it's no longer lucky enough to
-; have the load / store instructions lined up just right after
-; scheduler change for pr3457. We'll look for a robust solution
-; later.
 
 	%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
 @src = external global %struct.x
diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll
index 247465f33201..e9f194574e43 100644
--- a/test/CodeGen/ARM/str_pre-2.ll
+++ b/test/CodeGen/ARM/str_pre-2.ll
@@ -1,6 +1,5 @@
 ; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnu | grep {str.*\\!}
 ; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnu | grep {ldr.*\\\[.*\], #+4}
-; XFAIL: *
 
 @b = external global i64*
 
diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll
index 2acb33f9aebf..f58da4409356 100644
--- a/test/CodeGen/ARM/vfp.ll
+++ b/test/CodeGen/ARM/vfp.ll
@@ -39,10 +39,10 @@ define void @test_abs(float* %P, double* %D) {
 
 define void @test_add(float* %P, double* %D) {
 	%a = load float* %P		; <float> [#uses=2]
-	%b = add float %a, %a		; <float> [#uses=1]
+	%b = fadd float %a, %a		; <float> [#uses=1]
 	store float %b, float* %P
 	%A = load double* %D		; <double> [#uses=2]
-	%B = add double %A, %A		; <double> [#uses=1]
+	%B = fadd double %A, %A		; <double> [#uses=1]
 	store double %B, double* %D
 	ret void
 }
@@ -61,8 +61,8 @@ define void @test_fma(float* %P1, float* %P2, float* %P3) {
 	%a1 = load float* %P1		; <float> [#uses=1]
 	%a2 = load float* %P2		; <float> [#uses=1]
 	%a3 = load float* %P3		; <float> [#uses=1]
-	%X = mul float %a1, %a2		; <float> [#uses=1]
-	%Y = sub float %X, %a3		; <float> [#uses=1]
+	%X = fmul float %a1, %a2		; <float> [#uses=1]
+	%Y = fsub float %X, %a3		; <float> [#uses=1]
 	store float %Y, float* %P1
 	ret void
 }
diff --git a/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll b/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
index ca4e48eb5ea4..f8393a3fbc80 100644
--- a/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
+++ b/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
@@ -23,7 +23,7 @@ define double @test4(i64 %L) {
 
 define double @test5(double %D) {
         %X = bitcast double %D to double                ; <double> [#uses=1]
-        %Y = add double %X, 2.000000e+00                ; <double> [#uses=1]
+        %Y = fadd double %X, 2.000000e+00                ; <double> [#uses=1]
         %Z = bitcast double %Y to i64           ; <i64> [#uses=1]
         %res = bitcast i64 %Z to double         ; <double> [#uses=1]
         ret double %res
@@ -31,7 +31,7 @@ define double @test5(double %D) {
 
 define float @test6(float %F) {
         %X = bitcast float %F to float          ; <float> [#uses=1]
-        %Y = add float %X, 2.000000e+00         ; <float> [#uses=1]
+        %Y = fadd float %X, 2.000000e+00         ; <float> [#uses=1]
         %Z = bitcast float %Y to i32            ; <i32> [#uses=1]
         %res = bitcast i32 %Z to float          ; <float> [#uses=1]
         ret float %res
diff --git a/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll b/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
index afcac99a76df..32d635ad720e 100644
--- a/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
+++ b/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
@@ -20,7 +20,7 @@ entry:
 	br label %bb4
 
 bb4:		; preds = %bb5.split, %bb4, %entry
-	%0 = fcmp ogt ppc_fp128 0xM00000000000000000000000000000000, select (i1 fcmp olt (ppc_fp128 fpext (double 0x3C447AE147AE147B to ppc_fp128), ppc_fp128 mul (ppc_fp128 0xM00000000000000010000000000000000, ppc_fp128 0xM40140000000000000000000000000000)), ppc_fp128 mul (ppc_fp128 0xM00000000000000010000000000000000, ppc_fp128 0xM40140000000000000000000000000000), ppc_fp128 fpext (double 0x3C447AE147AE147B to ppc_fp128))		; <i1> [#uses=1]
+	%0 = fcmp ogt ppc_fp128 0xM00000000000000000000000000000000, select (i1 fcmp olt (ppc_fp128 fpext (double 0x3C447AE147AE147B to ppc_fp128), ppc_fp128 fmul (ppc_fp128 0xM00000000000000010000000000000000, ppc_fp128 0xM40140000000000000000000000000000)), ppc_fp128 fmul (ppc_fp128 0xM00000000000000010000000000000000, ppc_fp128 0xM40140000000000000000000000000000), ppc_fp128 fpext (double 0x3C447AE147AE147B to ppc_fp128))		; <i1> [#uses=1]
 	br i1 %0, label %bb4, label %bb5.split
 
 bb5.split:		; preds = %bb4
diff --git a/test/CodeGen/CBackend/fneg.ll b/test/CodeGen/CBackend/fneg.ll
new file mode 100644
index 000000000000..68849b20c769
--- /dev/null
+++ b/test/CodeGen/CBackend/fneg.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s | llc -march=c
+
+define void @func() nounwind {
+  entry:
+  %0 = fsub double -0.0, undef
+  ret void
+}
diff --git a/test/CodeGen/CBackend/vectors.ll b/test/CodeGen/CBackend/vectors.ll
index de78975491ef..d01e99288f7e 100644
--- a/test/CodeGen/CBackend/vectors.ll
+++ b/test/CodeGen/CBackend/vectors.ll
@@ -14,7 +14,7 @@ define i32 @test2(<4 x i32> %a, i32 %b) {
 }
 
 define <4 x float> @test3(<4 x float> %Y) {
-	%Z = add <4 x float> %Y, %Y
+	%Z = fadd <4 x float> %Y, %Y
 	%X = shufflevector <4 x float> zeroinitializer, <4 x float> %Z, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >
 	ret <4 x float> %X
 }
diff --git a/test/CodeGen/CellSPU/dp_farith.ll b/test/CodeGen/CellSPU/dp_farith.ll
index 2579a404eea5..d4802ae8f545 100644
--- a/test/CodeGen/CellSPU/dp_farith.ll
+++ b/test/CodeGen/CellSPU/dp_farith.ll
@@ -11,88 +11,88 @@ target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i
 target triple = "spu"
 
 define double @fadd(double %arg1, double %arg2) {
-        %A = add double %arg1, %arg2
+        %A = fadd double %arg1, %arg2
         ret double %A
 }
 
 define <2 x double> @fadd_vec(<2 x double> %arg1, <2 x double> %arg2) {
-        %A = add <2 x double> %arg1, %arg2
+        %A = fadd <2 x double> %arg1, %arg2
         ret <2 x double> %A
 }
 
 define double @fsub(double %arg1, double %arg2) {
-        %A = sub double %arg1,  %arg2
+        %A = fsub double %arg1,  %arg2
         ret double %A
 }
 
 define <2 x double> @fsub_vec(<2 x double> %arg1, <2 x double> %arg2) {
-        %A = sub <2 x double> %arg1,  %arg2
+        %A = fsub <2 x double> %arg1,  %arg2
         ret <2 x double> %A
 }
 
 define double @fmul(double %arg1, double %arg2) {
-        %A = mul double %arg1,  %arg2
+        %A = fmul double %arg1,  %arg2
         ret double %A
 }
 
 define <2 x double> @fmul_vec(<2 x double> %arg1, <2 x double> %arg2) {
-        %A = mul <2 x double> %arg1,  %arg2
+        %A = fmul <2 x double> %arg1,  %arg2
         ret <2 x double> %A
 }
 
 define double @fma(double %arg1, double %arg2, double %arg3) {
-        %A = mul double %arg1,  %arg2
-        %B = add double %A, %arg3
+        %A = fmul double %arg1,  %arg2
+        %B = fadd double %A, %arg3
         ret double %B
 }
 
 define <2 x double> @fma_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
-        %A = mul <2 x double> %arg1,  %arg2
-        %B = add <2 x double> %A, %arg3
+        %A = fmul <2 x double> %arg1,  %arg2
+        %B = fadd <2 x double> %A, %arg3
         ret <2 x double> %B
 }
 
 define double @fms(double %arg1, double %arg2, double %arg3) {
-        %A = mul double %arg1,  %arg2
-        %B = sub double %A, %arg3
+        %A = fmul double %arg1,  %arg2
+        %B = fsub double %A, %arg3
         ret double %B
 }
 
 define <2 x double> @fms_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
-        %A = mul <2 x double> %arg1,  %arg2
-        %B = sub <2 x double> %A, %arg3
+        %A = fmul <2 x double> %arg1,  %arg2
+        %B = fsub <2 x double> %A, %arg3
         ret <2 x double> %B
 }
 
 ; - (a * b - c)
 define double @d_fnms_1(double %arg1, double %arg2, double %arg3) {
-        %A = mul double %arg1,  %arg2
-        %B = sub double %A, %arg3
-        %C = sub double -0.000000e+00, %B               ; <double> [#uses=1]
+        %A = fmul double %arg1,  %arg2
+        %B = fsub double %A, %arg3
+        %C = fsub double -0.000000e+00, %B               ; <double> [#uses=1]
         ret double %C
 }
 
 ; Annother way of getting fnms
 ; - ( a * b ) + c => c - (a * b)
 define double @d_fnms_2(double %arg1, double %arg2, double %arg3) {
-        %A = mul double %arg1,  %arg2
-        %B = sub double %arg3, %A
+        %A = fmul double %arg1,  %arg2
+        %B = fsub double %arg3, %A
         ret double %B
 }
 
 ; FNMS: - (a * b - c) => c - (a * b)
 define <2 x double> @d_fnms_vec_1(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
-        %A = mul <2 x double> %arg1,  %arg2
-        %B = sub <2 x double> %arg3, %A ;
+        %A = fmul <2 x double> %arg1,  %arg2
+        %B = fsub <2 x double> %arg3, %A ;
         ret <2 x double> %B
 }
 
 ; Another way to get fnms using a constant vector
 ; - ( a * b - c)
 define <2 x double> @d_fnms_vec_2(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
-        %A = mul <2 x double> %arg1,  %arg2     ; <<2 x double>> [#uses=1]
-        %B = sub <2 x double> %A, %arg3 ; <<2 x double>> [#uses=1]
-        %C = sub <2 x double> < double -0.00000e+00, double -0.00000e+00 >, %B
+        %A = fmul <2 x double> %arg1,  %arg2     ; <<2 x double>> [#uses=1]
+        %B = fsub <2 x double> %A, %arg3 ; <<2 x double>> [#uses=1]
+        %C = fsub <2 x double> < double -0.00000e+00, double -0.00000e+00 >, %B
         ret <2 x double> %C
 }
 
diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll
index 4c6fbb95a39f..5bd66f4aaef3 100644
--- a/test/CodeGen/CellSPU/fneg-fabs.ll
+++ b/test/CodeGen/CellSPU/fneg-fabs.ll
@@ -7,22 +7,22 @@ target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i
 target triple = "spu"
 
 define double @fneg_dp(double %X) {
-        %Y = sub double -0.000000e+00, %X
+        %Y = fsub double -0.000000e+00, %X
         ret double %Y
 }
 
 define <2 x double> @fneg_dp_vec(<2 x double> %X) {
-        %Y = sub <2 x double> < double -0.0000e+00, double -0.0000e+00 >, %X
+        %Y = fsub <2 x double> < double -0.0000e+00, double -0.0000e+00 >, %X
         ret <2 x double> %Y
 }
 
 define float @fneg_sp(float %X) {
-        %Y = sub float -0.000000e+00, %X
+        %Y = fsub float -0.000000e+00, %X
         ret float %Y
 }
 
 define <4 x float> @fneg_sp_vec(<4 x float> %X) {
-        %Y = sub <4 x float> <float -0.000000e+00, float -0.000000e+00,
+        %Y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00,
                               float -0.000000e+00, float -0.000000e+00>, %X
         ret <4 x float> %Y
 }
diff --git a/test/CodeGen/CellSPU/sp_farith.ll b/test/CodeGen/CellSPU/sp_farith.ll
index df3baef85c9d..d77dd9216cd7 100644
--- a/test/CodeGen/CellSPU/sp_farith.ll
+++ b/test/CodeGen/CellSPU/sp_farith.ll
@@ -12,79 +12,79 @@ target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i
 target triple = "spu"
 
 define float @fp_add(float %arg1, float %arg2) {
-        %A = add float %arg1, %arg2     ; <float> [#uses=1]
+        %A = fadd float %arg1, %arg2     ; <float> [#uses=1]
         ret float %A
 }
 
 define <4 x float> @fp_add_vec(<4 x float> %arg1, <4 x float> %arg2) {
-        %A = add <4 x float> %arg1, %arg2       ; <<4 x float>> [#uses=1]
+        %A = fadd <4 x float> %arg1, %arg2       ; <<4 x float>> [#uses=1]
         ret <4 x float> %A
 }
 
 define float @fp_sub(float %arg1, float %arg2) {
-        %A = sub float %arg1,  %arg2    ; <float> [#uses=1]
+        %A = fsub float %arg1,  %arg2    ; <float> [#uses=1]
         ret float %A
 }
 
 define <4 x float> @fp_sub_vec(<4 x float> %arg1, <4 x float> %arg2) {
-        %A = sub <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
+        %A = fsub <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
         ret <4 x float> %A
 }
 
 define float @fp_mul(float %arg1, float %arg2) {
-        %A = mul float %arg1,  %arg2    ; <float> [#uses=1]
+        %A = fmul float %arg1,  %arg2    ; <float> [#uses=1]
         ret float %A
 }
 
 define <4 x float> @fp_mul_vec(<4 x float> %arg1, <4 x float> %arg2) {
-        %A = mul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
+        %A = fmul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
         ret <4 x float> %A
 }
 
 define float @fp_mul_add(float %arg1, float %arg2, float %arg3) {
-        %A = mul float %arg1,  %arg2    ; <float> [#uses=1]
-        %B = add float %A, %arg3        ; <float> [#uses=1]
+        %A = fmul float %arg1,  %arg2    ; <float> [#uses=1]
+        %B = fadd float %A, %arg3        ; <float> [#uses=1]
         ret float %B
 }
 
 define <4 x float> @fp_mul_add_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
-        %A = mul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
-        %B = add <4 x float> %A, %arg3  ; <<4 x float>> [#uses=1]
+        %A = fmul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
+        %B = fadd <4 x float> %A, %arg3  ; <<4 x float>> [#uses=1]
         ret <4 x float> %B
 }
 
 define float @fp_mul_sub(float %arg1, float %arg2, float %arg3) {
-        %A = mul float %arg1,  %arg2    ; <float> [#uses=1]
-        %B = sub float %A, %arg3        ; <float> [#uses=1]
+        %A = fmul float %arg1,  %arg2    ; <float> [#uses=1]
+        %B = fsub float %A, %arg3        ; <float> [#uses=1]
         ret float %B
 }
 
 define <4 x float> @fp_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
-        %A = mul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
-        %B = sub <4 x float> %A, %arg3  ; <<4 x float>> [#uses=1]
+        %A = fmul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
+        %B = fsub <4 x float> %A, %arg3  ; <<4 x float>> [#uses=1]
         ret <4 x float> %B
 }
 
 ; Test the straightforward way of getting fnms
 ; c - a * b
 define float @fp_neg_mul_sub_1(float %arg1, float %arg2, float %arg3) {
-        %A = mul float %arg1,  %arg2
-        %B = sub float %arg3, %A
+        %A = fmul float %arg1,  %arg2
+        %B = fsub float %arg3, %A
         ret float %B
 }
 
 ; Test another way of getting fnms
 ; - ( a *b -c ) = c - a * b
 define float @fp_neg_mul_sub_2(float %arg1, float %arg2, float %arg3) {
-        %A = mul float %arg1,  %arg2
-        %B = sub float %A, %arg3 
-        %C = sub float -0.0, %B
+        %A = fmul float %arg1,  %arg2
+        %B = fsub float %A, %arg3
+        %C = fsub float -0.0, %B
         ret float %C
 }
 
 define <4 x float> @fp_neg_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
-        %A = mul <4 x float> %arg1,  %arg2
-        %B = sub <4 x float> %A, %arg3
-        %D = sub <4 x float> < float -0.0, float -0.0, float -0.0, float -0.0 >, %B
+        %A = fmul <4 x float> %arg1,  %arg2
+        %B = fsub <4 x float> %A, %arg3
+        %D = fsub <4 x float> < float -0.0, float -0.0, float -0.0, float -0.0 >, %B
         ret <4 x float> %D
 }
diff --git a/test/CodeGen/Generic/2006-07-03-schedulers.ll b/test/CodeGen/Generic/2006-07-03-schedulers.ll
index 4c4481ccad8c..597ee56609ba 100644
--- a/test/CodeGen/Generic/2006-07-03-schedulers.ll
+++ b/test/CodeGen/Generic/2006-07-03-schedulers.ll
@@ -12,13 +12,13 @@ define i32 @testissue(i32 %i, float %x, float %y) {
 	br label %bb1
 
 bb1:		; preds = %bb1, %0
-	%x1 = mul float %x, %y		; <float> [#uses=1]
-	%y1 = mul float %y, 7.500000e-01		; <float> [#uses=1]
-	%z1 = add float %x1, %y1		; <float> [#uses=1]
-	%x2 = mul float %x, 5.000000e-01		; <float> [#uses=1]
-	%y2 = mul float %y, 0x3FECCCCCC0000000		; <float> [#uses=1]
-	%z2 = add float %x2, %y2		; <float> [#uses=1]
-	%z3 = add float %z1, %z2		; <float> [#uses=1]
+	%x1 = fmul float %x, %y		; <float> [#uses=1]
+	%y1 = fmul float %y, 7.500000e-01		; <float> [#uses=1]
+	%z1 = fadd float %x1, %y1		; <float> [#uses=1]
+	%x2 = fmul float %x, 5.000000e-01		; <float> [#uses=1]
+	%y2 = fmul float %y, 0x3FECCCCCC0000000		; <float> [#uses=1]
+	%z2 = fadd float %x2, %y2		; <float> [#uses=1]
+	%z3 = fadd float %z1, %z2		; <float> [#uses=1]
 	%i1 = shl i32 %i, 3		; <i32> [#uses=1]
 	%j1 = add i32 %i, 7		; <i32> [#uses=1]
 	%m1 = add i32 %i1, %j1		; <i32> [#uses=2]
diff --git a/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll b/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll
index 74957952fe7b..a61108a0012a 100644
--- a/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll
+++ b/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll
@@ -71,10 +71,10 @@ cond_next159.i:		; preds = %cond_true356.i.preheader
 	%tmp178.i = add i32 %tmp116117.i, -128		; <i32> [#uses=2]
 	%tmp181.i = mul i32 %tmp178.i, %tmp178.i		; <i32> [#uses=1]
 	%tmp181182.i = sitofp i32 %tmp181.i to float		; <float> [#uses=1]
-	%tmp199200.pn.in.i = mul float %tmp181182.i, 0.000000e+00		; <float> [#uses=1]
+	%tmp199200.pn.in.i = fmul float %tmp181182.i, 0.000000e+00		; <float> [#uses=1]
 	%tmp199200.pn.i = fpext float %tmp199200.pn.in.i to double		; <double> [#uses=1]
-	%tmp201.pn.i = sub double 1.000000e+00, %tmp199200.pn.i		; <double> [#uses=1]
-	%factor.2.in.i = mul double 0.000000e+00, %tmp201.pn.i		; <double> [#uses=1]
+	%tmp201.pn.i = fsub double 1.000000e+00, %tmp199200.pn.i		; <double> [#uses=1]
+	%factor.2.in.i = fmul double 0.000000e+00, %tmp201.pn.i		; <double> [#uses=1]
 	%factor.2.i = fptrunc double %factor.2.in.i to float		; <float> [#uses=1]
 	br i1 false, label %cond_next312.i, label %cond_false222.i
 
diff --git a/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll b/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll
index 1cf822b71190..9acb852bced0 100644
--- a/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll
+++ b/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll
@@ -5,7 +5,7 @@ entry:
 	br label %bb15
 
 bb15:		; preds = %bb15, %entry
-	%tmp21 = add <8 x double> zeroinitializer, zeroinitializer		; <<8 x double>> [#uses=1]
+	%tmp21 = fadd <8 x double> zeroinitializer, zeroinitializer		; <<8 x double>> [#uses=1]
 	br i1 false, label %bb30, label %bb15
 
 bb30:		; preds = %bb15
diff --git a/test/CodeGen/Generic/2008-02-25-NegateZero.ll b/test/CodeGen/Generic/2008-02-25-NegateZero.ll
index e5a52748464b..01693079a3ef 100644
--- a/test/CodeGen/Generic/2008-02-25-NegateZero.ll
+++ b/test/CodeGen/Generic/2008-02-25-NegateZero.ll
@@ -5,8 +5,8 @@ define void @test() {
 entry:
 	%tmp98 = load float* null, align 4		; <float> [#uses=1]
 	%tmp106 = load float* null, align 4		; <float> [#uses=1]
-	%tmp113 = add float %tmp98, %tmp106		; <float> [#uses=1]
-	%tmp119 = sub float %tmp113, 0.000000e+00		; <float> [#uses=1]
+	%tmp113 = fadd float %tmp98, %tmp106		; <float> [#uses=1]
+	%tmp119 = fsub float %tmp113, 0.000000e+00		; <float> [#uses=1]
 	call void (i32, ...)* @foo( i32 0, float 0.000000e+00, float %tmp119 ) nounwind 
 	ret void
 }
diff --git a/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll b/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll
index 7fe19d925bed..b2112f3ad39b 100644
--- a/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll
+++ b/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll
@@ -30,16 +30,16 @@ bb.nph1770:		; preds = %bb429
 	br i1 false, label %bb471, label %bb505
 
 bb471:		; preds = %bb471, %bb.nph1770
-	%tmp487 = add double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%tmp487 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
 	br i1 false, label %bb505, label %bb471
 
 bb505:		; preds = %bb471, %bb.nph1770
 	%xy.0.lcssa = phi double [ 0.000000e+00, %bb.nph1770 ], [ %tmp487, %bb471 ]		; <double> [#uses=1]
-	%tmp507 = sub double -0.000000e+00, %xy.0.lcssa		; <double> [#uses=1]
+	%tmp507 = fsub double -0.000000e+00, %xy.0.lcssa		; <double> [#uses=1]
 	%tmp509 = fdiv double %tmp507, 0.000000e+00		; <double> [#uses=1]
-	%tmp510 = mul double %tmp509, 1.024000e+03		; <double> [#uses=1]
+	%tmp510 = fmul double %tmp509, 1.024000e+03		; <double> [#uses=1]
 	%tmp516 = fdiv double %tmp510, 0.000000e+00		; <double> [#uses=1]
-	%tmp517 = add double %tmp516, 5.000000e-01		; <double> [#uses=1]
+	%tmp517 = fadd double %tmp516, 5.000000e-01		; <double> [#uses=1]
 	%tmp518 = tail call double @floor( double %tmp517 ) nounwind readnone 		; <double> [#uses=0]
 	ret i32 0
 
diff --git a/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll b/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
new file mode 100644
index 000000000000..59e7d0c7a8f5
--- /dev/null
+++ b/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llc
+; PR4317
+
+declare i32 @b()
+
+define void @a() {
+entry:
+  ret void
+
+dummy:
+  invoke i32 @b() to label %reg unwind label %reg
+
+reg:
+  ret void
+}
diff --git a/test/CodeGen/Generic/fneg-fabs.ll b/test/CodeGen/Generic/fneg-fabs.ll
index f9580b1f309b..2709fa1afd68 100644
--- a/test/CodeGen/Generic/fneg-fabs.ll
+++ b/test/CodeGen/Generic/fneg-fabs.ll
@@ -1,12 +1,12 @@
 ; RUN: llvm-as < %s | llc
 
 define double @fneg(double %X) {
-        %Y = sub double -0.000000e+00, %X               ; <double> [#uses=1]
+        %Y = fsub double -0.000000e+00, %X               ; <double> [#uses=1]
         ret double %Y
 }
 
 define float @fnegf(float %X) {
-        %Y = sub float -0.000000e+00, %X                ; <float> [#uses=1]
+        %Y = fsub float -0.000000e+00, %X                ; <float> [#uses=1]
         ret float %Y
 }
 
diff --git a/test/CodeGen/Generic/print-arith-fp.ll b/test/CodeGen/Generic/print-arith-fp.ll
index 87aa1a0ba9f7..1e27061941a4 100644
--- a/test/CodeGen/Generic/print-arith-fp.ll
+++ b/test/CodeGen/Generic/print-arith-fp.ll
@@ -24,9 +24,9 @@ define i32 @main() {
 	%b_s = getelementptr [8 x i8]* @b_str, i64 0, i64 0		; <i8*> [#uses=1]
 	call i32 (i8*, ...)* @printf( i8* %a_s, double %a )		; <i32>:1 [#uses=0]
 	call i32 (i8*, ...)* @printf( i8* %b_s, double %b )		; <i32>:2 [#uses=0]
-	%add_r = add double %a, %b		; <double> [#uses=1]
-	%sub_r = sub double %a, %b		; <double> [#uses=1]
-	%mul_r = mul double %a, %b		; <double> [#uses=1]
+	%add_r = fadd double %a, %b		; <double> [#uses=1]
+	%sub_r = fsub double %a, %b		; <double> [#uses=1]
+	%mul_r = fmul double %a, %b		; <double> [#uses=1]
 	%div_r = fdiv double %b, %a		; <double> [#uses=1]
 	%rem_r = frem double %b, %a		; <double> [#uses=1]
 	%add_s = getelementptr [12 x i8]* @add_str, i64 0, i64 0		; <i8*> [#uses=1]
diff --git a/test/CodeGen/Generic/select.ll b/test/CodeGen/Generic/select.ll
index fc573f3282ce..a532703d9417 100644
--- a/test/CodeGen/Generic/select.ll
+++ b/test/CodeGen/Generic/select.ll
@@ -9,8 +9,8 @@ define void @testConsts(i32 %N, float %X) {
         %a = add i32 %N, 1              ; <i32> [#uses=0]
         %i = add i32 %N, 12345678               ; <i32> [#uses=0]
         %b = add i16 4, 3               ; <i16> [#uses=0]
-        %c = add float %X, 0.000000e+00         ; <float> [#uses=0]
-        %d = add float %X, 0x400921CAC0000000           ; <float> [#uses=0]
+        %c = fadd float %X, 0.000000e+00         ; <float> [#uses=0]
+        %d = fadd float %X, 0x400921CAC0000000           ; <float> [#uses=0]
         %f = add i32 -1, 10             ; <i32> [#uses=0]
         %g = add i16 20, -1             ; <i16> [#uses=0]
         %j = add i16 -1, 30             ; <i16> [#uses=0]
@@ -126,8 +126,8 @@ define void @testfloatbool(float %x, float %y) {
         br label %Top
 
 Top:            ; preds = %Top, %0
-        %p = add float %x, %y           ; <float> [#uses=1]
-        %z = sub float %x, %y           ; <float> [#uses=1]
+        %p = fadd float %x, %y           ; <float> [#uses=1]
+        %z = fsub float %x, %y           ; <float> [#uses=1]
         %b = fcmp ole float %p, %z              ; <i1> [#uses=2]
         %c = xor i1 %b, true            ; <i1> [#uses=0]
         br i1 %b, label %Top, label %goon
diff --git a/test/CodeGen/Generic/storetrunc-fp.ll b/test/CodeGen/Generic/storetrunc-fp.ll
index 710a9907e018..0f7bb0b85ee9 100644
--- a/test/CodeGen/Generic/storetrunc-fp.ll
+++ b/test/CodeGen/Generic/storetrunc-fp.ll
@@ -1,7 +1,7 @@
 ; RUN: llvm-as < %s | llc
 
 define void @foo(double %a, double %b, float* %fp) {
-	%c = add double %a, %b
+	%c = fadd double %a, %b
 	%d = fptrunc double %c to float
 	store float %d, float* %fp
 	ret void
diff --git a/test/CodeGen/Generic/v-split.ll b/test/CodeGen/Generic/v-split.ll
index a3124924638f..44601d0c4024 100644
--- a/test/CodeGen/Generic/v-split.ll
+++ b/test/CodeGen/Generic/v-split.ll
@@ -4,7 +4,7 @@
 define void @test_f8(%f8 *%P, %f8* %Q, %f8 *%S) {
   %p = load %f8* %P
   %q = load %f8* %Q
-  %R = add %f8 %p, %q
+  %R = fadd %f8 %p, %q
   store %f8 %R, %f8 *%S
   ret void
 }
diff --git a/test/CodeGen/Generic/vector.ll b/test/CodeGen/Generic/vector.ll
index 910575750e3e..f283256d10ac 100644
--- a/test/CodeGen/Generic/vector.ll
+++ b/test/CodeGen/Generic/vector.ll
@@ -14,7 +14,7 @@
 define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
 	%p = load %f1* %P		; <%f1> [#uses=1]
 	%q = load %f1* %Q		; <%f1> [#uses=1]
-	%R = add %f1 %p, %q		; <%f1> [#uses=1]
+	%R = fadd %f1 %p, %q		; <%f1> [#uses=1]
 	store %f1 %R, %f1* %S
 	ret void
 }
@@ -22,7 +22,7 @@ define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
 define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
 	%p = load %f2* %P		; <%f2> [#uses=1]
 	%q = load %f2* %Q		; <%f2> [#uses=1]
-	%R = add %f2 %p, %q		; <%f2> [#uses=1]
+	%R = fadd %f2 %p, %q		; <%f2> [#uses=1]
 	store %f2 %R, %f2* %S
 	ret void
 }
@@ -30,7 +30,7 @@ define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
 define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
 	%p = load %f4* %P		; <%f4> [#uses=1]
 	%q = load %f4* %Q		; <%f4> [#uses=1]
-	%R = add %f4 %p, %q		; <%f4> [#uses=1]
+	%R = fadd %f4 %p, %q		; <%f4> [#uses=1]
 	store %f4 %R, %f4* %S
 	ret void
 }
@@ -38,7 +38,7 @@ define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
 define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
 	%p = load %f8* %P		; <%f8> [#uses=1]
 	%q = load %f8* %Q		; <%f8> [#uses=1]
-	%R = add %f8 %p, %q		; <%f8> [#uses=1]
+	%R = fadd %f8 %p, %q		; <%f8> [#uses=1]
 	store %f8 %R, %f8* %S
 	ret void
 }
@@ -46,7 +46,7 @@ define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
 define void @test_fmul(%f8* %P, %f8* %Q, %f8* %S) {
 	%p = load %f8* %P		; <%f8> [#uses=1]
 	%q = load %f8* %Q		; <%f8> [#uses=1]
-	%R = mul %f8 %p, %q		; <%f8> [#uses=1]
+	%R = fmul %f8 %p, %q		; <%f8> [#uses=1]
 	store %f8 %R, %f8* %S
 	ret void
 }
@@ -64,21 +64,21 @@ define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {
 
 define void @test_cst(%f4* %P, %f4* %S) {
 	%p = load %f4* %P		; <%f4> [#uses=1]
-	%R = add %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float 2.000000e+00, float 4.500000e+00 >		; <%f4> [#uses=1]
+	%R = fadd %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float 2.000000e+00, float 4.500000e+00 >		; <%f4> [#uses=1]
 	store %f4 %R, %f4* %S
 	ret void
 }
 
 define void @test_zero(%f4* %P, %f4* %S) {
 	%p = load %f4* %P		; <%f4> [#uses=1]
-	%R = add %f4 %p, zeroinitializer		; <%f4> [#uses=1]
+	%R = fadd %f4 %p, zeroinitializer		; <%f4> [#uses=1]
 	store %f4 %R, %f4* %S
 	ret void
 }
 
 define void @test_undef(%f4* %P, %f4* %S) {
 	%p = load %f4* %P		; <%f4> [#uses=1]
-	%R = add %f4 %p, undef		; <%f4> [#uses=1]
+	%R = fadd %f4 %p, undef		; <%f4> [#uses=1]
 	store %f4 %R, %f4* %S
 	ret void
 }
@@ -115,7 +115,7 @@ define double @test_extract_elt2(%d8* %P) {
 
 define void @test_cast_1(%f4* %b, %i4* %a) {
 	%tmp = load %f4* %b		; <%f4> [#uses=1]
-	%tmp2 = add %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >		; <%f4> [#uses=1]
+	%tmp2 = fadd %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >		; <%f4> [#uses=1]
 	%tmp3 = bitcast %f4 %tmp2 to %i4		; <%i4> [#uses=1]
 	%tmp4 = add %i4 %tmp3, < i32 1, i32 2, i32 3, i32 4 >		; <%i4> [#uses=1]
 	store %i4 %tmp4, %i4* %a
@@ -137,7 +137,7 @@ define void @splat(%f4* %P, %f4* %Q, float %X) {
 	%tmp4 = insertelement %f4 %tmp2, float %X, i32 2		; <%f4> [#uses=1]
 	%tmp6 = insertelement %f4 %tmp4, float %X, i32 3		; <%f4> [#uses=1]
 	%q = load %f4* %Q		; <%f4> [#uses=1]
-	%R = add %f4 %q, %tmp6		; <%f4> [#uses=1]
+	%R = fadd %f4 %q, %tmp6		; <%f4> [#uses=1]
 	store %f4 %R, %f4* %P
 	ret void
 }
diff --git a/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll b/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll
index 20050e93ac9b..70f1d996e7ca 100644
--- a/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll
+++ b/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll
@@ -2,7 +2,7 @@
 
 define i16 @test(double %d) nounwind {
 entry:
-        %add = add double %d, 1.000000e+00
+        %add = fadd double %d, 1.000000e+00
         %call = tail call i16 @funct(double %add) nounwind
         ret i16 %call
 }
diff --git a/test/CodeGen/Mips/2008-07-06-fadd64.ll b/test/CodeGen/Mips/2008-07-06-fadd64.ll
index 95792ff15719..f8eca85efafb 100644
--- a/test/CodeGen/Mips/2008-07-06-fadd64.ll
+++ b/test/CodeGen/Mips/2008-07-06-fadd64.ll
@@ -5,6 +5,6 @@ target triple = "mipsallegrexel-psp-elf"
 
 define double @dofloat(double %a, double %b) nounwind {
 entry:
-	add double %a, %b		; <double>:0 [#uses=1]
+	fadd double %a, %b		; <double>:0 [#uses=1]
 	ret double %0
 }
diff --git a/test/CodeGen/Mips/2008-07-22-Cstpool.ll b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
index 99eccf500db5..2af7ab17c2cd 100644
--- a/test/CodeGen/Mips/2008-07-22-Cstpool.ll
+++ b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
@@ -6,7 +6,7 @@ target triple = "mipsallegrexel-psp-elf"
 
 define float @F(float %a) nounwind {
 entry:
-	add float %a, 0x4011333340000000		; <float>:0 [#uses=1]
-	add float %0, 0x4010666660000000		; <float>:1 [#uses=1]
+	fadd float %a, 0x4011333340000000		; <float>:0 [#uses=1]
+	fadd float %0, 0x4010666660000000		; <float>:1 [#uses=1]
 	ret float %1
 }
diff --git a/test/CodeGen/Mips/2008-07-23-fpcmp.ll b/test/CodeGen/Mips/2008-07-23-fpcmp.ll
index 7bc1f42d10a4..4580215b38f6 100644
--- a/test/CodeGen/Mips/2008-07-23-fpcmp.ll
+++ b/test/CodeGen/Mips/2008-07-23-fpcmp.ll
@@ -11,7 +11,7 @@ entry:
 	br i1 %0, label %bb, label %bb2
 
 bb:		; preds = %entry
-	add float %a, 1.000000e+00		; <float>:1 [#uses=1]
+	fadd float %a, 1.000000e+00		; <float>:1 [#uses=1]
 	ret float %1
 
 bb2:		; preds = %entry
diff --git a/test/CodeGen/Mips/2008-08-03-fabs64.ll b/test/CodeGen/Mips/2008-08-03-fabs64.ll
index 8495bfe11290..9d18f47bce20 100644
--- a/test/CodeGen/Mips/2008-08-03-fabs64.ll
+++ b/test/CodeGen/Mips/2008-08-03-fabs64.ll
@@ -9,7 +9,7 @@ define double @A(double %c, double %d) nounwind readnone  {
 entry:
 	tail call double @fabs( double %c ) nounwind readnone 		; <double>:0 [#uses=1]
 	tail call double @fabs( double %d ) nounwind readnone 		; <double>:0 [#uses=1]
-  add double %0, %1
+  fadd double %0, %1
   ret double %2
 }
 
diff --git a/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll b/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
index c9ee2cf81a0e..1f7440afedd6 100644
--- a/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
+++ b/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
@@ -21,12 +21,12 @@ entry:
 	load i16* %3, align 2		; <i16>:4 [#uses=1]
 	uitofp i16 %4 to double		; <double>:5 [#uses=1]
 	tail call double @ldexp( double %5, i32 -32 ) nounwind		; <double>:6 [#uses=1]
-	add double %2, %6		; <double>:7 [#uses=1]
+	fadd double %2, %6		; <double>:7 [#uses=1]
 	getelementptr i16* %xseed, i32 2		; <i16*>:8 [#uses=1]
 	load i16* %8, align 2		; <i16>:9 [#uses=1]
 	uitofp i16 %9 to double		; <double>:10 [#uses=1]
 	tail call double @ldexp( double %10, i32 -16 ) nounwind		; <double>:11 [#uses=1]
-	add double %7, %11		; <double>:12 [#uses=1]
+	fadd double %7, %11		; <double>:12 [#uses=1]
 	ret double %12
 }
 
@@ -45,11 +45,11 @@ entry:
 	load i16* %4, align 2		; <i16>:5 [#uses=1]
 	uitofp i16 %5 to double		; <double>:6 [#uses=1]
 	tail call double @ldexp( double %6, i32 -32 ) nounwind		; <double>:7 [#uses=1]
-	add double %3, %7		; <double>:8 [#uses=1]
+	fadd double %3, %7		; <double>:8 [#uses=1]
 	getelementptr i16* %xseed, i32 2		; <i16*>:9 [#uses=1]
 	load i16* %9, align 2		; <i16>:10 [#uses=1]
 	uitofp i16 %10 to double		; <double>:11 [#uses=1]
 	tail call double @ldexp( double %11, i32 -16 ) nounwind		; <double>:12 [#uses=1]
-	add double %8, %12		; <double>:13 [#uses=1]
+	fadd double %8, %12		; <double>:13 [#uses=1]
 	ret double %13
 }
diff --git a/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll b/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
index e2f06f5e69ae..1b3bde8fb12e 100644
--- a/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
+++ b/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
@@ -5,6 +5,6 @@ target triple = "powerpc-apple-darwin8.2.0"
 
 ; Dead argument should reserve an FP register.
 define double @bar(double %DEAD, double %X, double %Y) {
-        %tmp.2 = add double %X, %Y              ; <double> [#uses=1]
+        %tmp.2 = fadd double %X, %Y              ; <double> [#uses=1]
         ret double %tmp.2
 }
diff --git a/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll b/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
index a58cd162b4e8..7a65c00f104f 100644
--- a/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
+++ b/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
@@ -9,15 +9,15 @@ define void @offset(%struct.Point* %pt, double %x, double %y, double %z) {
 entry:
         %tmp = getelementptr %struct.Point* %pt, i32 0, i32 0           ; <double*> [#uses=2]
         %tmp.upgrd.1 = load double* %tmp                ; <double> [#uses=1]
-        %tmp2 = add double %tmp.upgrd.1, %x             ; <double> [#uses=1]
+        %tmp2 = fadd double %tmp.upgrd.1, %x             ; <double> [#uses=1]
         store double %tmp2, double* %tmp
         %tmp6 = getelementptr %struct.Point* %pt, i32 0, i32 1          ; <double*> [#uses=2]
         %tmp7 = load double* %tmp6              ; <double> [#uses=1]
-        %tmp9 = add double %tmp7, %y            ; <double> [#uses=1]
+        %tmp9 = fadd double %tmp7, %y            ; <double> [#uses=1]
         store double %tmp9, double* %tmp6
         %tmp13 = getelementptr %struct.Point* %pt, i32 0, i32 2         ; <double*> [#uses=2]
         %tmp14 = load double* %tmp13            ; <double> [#uses=1]
-        %tmp16 = add double %tmp14, %z          ; <double> [#uses=1]
+        %tmp16 = fadd double %tmp14, %z          ; <double> [#uses=1]
         store double %tmp16, double* %tmp13
         ret void
 }
diff --git a/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll b/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
index 04ca3bbde1d3..637208b610a4 100644
--- a/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
+++ b/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
@@ -604,10 +604,10 @@ xPIF.exit:		; preds = %.critedge7898, %xOperationInitMasks.exit
 	shufflevector <4 x float> %583, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>>:589 [#uses=1]
 	shufflevector <4 x float> %585, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>>:590 [#uses=1]
 	shufflevector <4 x float> %588, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>>:591 [#uses=1]
-	mul <4 x float> zeroinitializer, %589		; <<4 x float>>:592 [#uses=0]
-	mul <4 x float> zeroinitializer, %590		; <<4 x float>>:593 [#uses=0]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:594 [#uses=1]
-	mul <4 x float> zeroinitializer, %591		; <<4 x float>>:595 [#uses=0]
+	fmul <4 x float> zeroinitializer, %589		; <<4 x float>>:592 [#uses=0]
+	fmul <4 x float> zeroinitializer, %590		; <<4 x float>>:593 [#uses=0]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:594 [#uses=1]
+	fmul <4 x float> zeroinitializer, %591		; <<4 x float>>:595 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0		; <<4 x float>*>:596 [#uses=2]
 	load <4 x float>* %596		; <<4 x float>>:597 [#uses=0]
 	store <4 x float> zeroinitializer, <4 x float>* %596
@@ -621,8 +621,8 @@ xPIF.exit:		; preds = %.critedge7898, %xOperationInitMasks.exit
 	load <4 x float>* null		; <<4 x float>>:604 [#uses=1]
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:605 [#uses=1]
 	load <4 x float>* %605		; <<4 x float>>:606 [#uses=1]
-	sub <4 x float> zeroinitializer, %604		; <<4 x float>>:607 [#uses=2]
-	sub <4 x float> zeroinitializer, %606		; <<4 x float>>:608 [#uses=2]
+	fsub <4 x float> zeroinitializer, %604		; <<4 x float>>:607 [#uses=2]
+	fsub <4 x float> zeroinitializer, %606		; <<4 x float>>:608 [#uses=2]
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:609 [#uses=0]
 	br i1 false, label %617, label %610
 
@@ -672,21 +672,21 @@ xST.exit400:		; preds = %633, %625, %610
 	load <4 x float>* null		; <<4 x float>>:638 [#uses=2]
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:639 [#uses=0]
 	load <4 x float>* null		; <<4 x float>>:640 [#uses=2]
-	mul <4 x float> %638, %638		; <<4 x float>>:641 [#uses=1]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:642 [#uses=0]
-	mul <4 x float> %640, %640		; <<4 x float>>:643 [#uses=2]
+	fmul <4 x float> %638, %638		; <<4 x float>>:641 [#uses=1]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:642 [#uses=0]
+	fmul <4 x float> %640, %640		; <<4 x float>>:643 [#uses=2]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>>:644 [#uses=0]
 	shufflevector <4 x float> %643, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>>:645 [#uses=1]
-	add <4 x float> %645, %643		; <<4 x float>>:646 [#uses=0]
+	fadd <4 x float> %645, %643		; <<4 x float>>:646 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>>:647 [#uses=1]
 	shufflevector <4 x float> %641, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>>:648 [#uses=1]
-	add <4 x float> zeroinitializer, %647		; <<4 x float>>:649 [#uses=2]
-	add <4 x float> zeroinitializer, %648		; <<4 x float>>:650 [#uses=0]
-	add <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:651 [#uses=2]
+	fadd <4 x float> zeroinitializer, %647		; <<4 x float>>:649 [#uses=2]
+	fadd <4 x float> zeroinitializer, %648		; <<4 x float>>:650 [#uses=0]
+	fadd <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:651 [#uses=2]
 	call <4 x float> @llvm.ppc.altivec.vrsqrtefp( <4 x float> %649 )		; <<4 x float>>:652 [#uses=1]
-	mul <4 x float> %652, %649		; <<4 x float>>:653 [#uses=1]
+	fmul <4 x float> %652, %649		; <<4 x float>>:653 [#uses=1]
 	call <4 x float> @llvm.ppc.altivec.vrsqrtefp( <4 x float> %651 )		; <<4 x float>>:654 [#uses=1]
-	mul <4 x float> %654, %651		; <<4 x float>>:655 [#uses=0]
+	fmul <4 x float> %654, %651		; <<4 x float>>:655 [#uses=0]
 	icmp eq i32 0, 0		; <i1>:656 [#uses=1]
 	br i1 %656, label %665, label %657
 
@@ -721,9 +721,9 @@ xST.exit402:		; preds = %669, %657
 	load <4 x float>* null		; <<4 x float>>:676 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:677 [#uses=1]
 	shufflevector <4 x float> %675, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:678 [#uses=1]
-	mul <4 x float> zeroinitializer, %677		; <<4 x float>>:679 [#uses=0]
-	mul <4 x float> zeroinitializer, %678		; <<4 x float>>:680 [#uses=0]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:681 [#uses=1]
+	fmul <4 x float> zeroinitializer, %677		; <<4 x float>>:679 [#uses=0]
+	fmul <4 x float> zeroinitializer, %678		; <<4 x float>>:680 [#uses=0]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:681 [#uses=1]
 	icmp eq i32 0, 0		; <i1>:682 [#uses=1]
 	br i1 %682, label %689, label %683
 
@@ -750,7 +750,7 @@ xST.exit405:		; preds = %689, %683
 	load <4 x float>* null		; <<4 x float>>:698 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2		; <<4 x float>*>:699 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:700 [#uses=1]
-	add <4 x float> zeroinitializer, %700		; <<4 x float>>:701 [#uses=0]
+	fadd <4 x float> zeroinitializer, %700		; <<4 x float>>:701 [#uses=0]
 	load <4 x i32>* %.sub7896		; <<4 x i32>>:702 [#uses=1]
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %702, <4 x i32> zeroinitializer )		; <i32>:703 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1		; <<4 x float>*>:704 [#uses=2]
@@ -769,7 +769,7 @@ xST.exit405:		; preds = %689, %683
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:714 [#uses=1]
 	load <4 x float>* %714		; <<4 x float>>:715 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:716 [#uses=0]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:717 [#uses=1]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:717 [#uses=1]
 	load <4 x i32>* %.sub7896		; <<4 x i32>>:718 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 0		; <<4 x float>*>:719 [#uses=1]
 	store <4 x float> zeroinitializer, <4 x float>* %719
@@ -791,10 +791,10 @@ xST.exit405:		; preds = %689, %683
 	load <4 x float>* %732		; <<4 x float>>:733 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:734 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:735 [#uses=1]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:736 [#uses=1]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:737 [#uses=1]
-	mul <4 x float> zeroinitializer, %735		; <<4 x float>>:738 [#uses=1]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:739 [#uses=1]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:736 [#uses=1]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:737 [#uses=1]
+	fmul <4 x float> zeroinitializer, %735		; <<4 x float>>:738 [#uses=1]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:739 [#uses=1]
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:740 [#uses=1]
 	icmp eq i32 %740, 0		; <i1>:741 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0		; <<4 x float>*>:742 [#uses=2]
@@ -821,9 +821,9 @@ xST.exit405:		; preds = %689, %683
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:761 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:762 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:763 [#uses=1]
-	add <4 x float> %757, zeroinitializer		; <<4 x float>>:764 [#uses=0]
-	add <4 x float> %758, %763		; <<4 x float>>:765 [#uses=0]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:766 [#uses=1]
+	fadd <4 x float> %757, zeroinitializer		; <<4 x float>>:764 [#uses=0]
+	fadd <4 x float> %758, %763		; <<4 x float>>:765 [#uses=0]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:766 [#uses=1]
 	br i1 false, label %773, label %767
 
 ; <label>:767		; preds = %xST.exit405
@@ -841,7 +841,7 @@ xST.exit405:		; preds = %689, %683
 xST.exit422:		; preds = %773, %767
 	%.07267 = phi <4 x float> [ %766, %767 ], [ undef, %773 ]		; <<4 x float>> [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:774 [#uses=0]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:775 [#uses=0]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:775 [#uses=0]
 	icmp eq i32 0, 0		; <i1>:776 [#uses=1]
 	br i1 %776, label %780, label %777
 
@@ -1295,7 +1295,7 @@ xST.exit469:		; preds = %1027, %1025, %1005
 	%.07489 = phi <4 x float> [ %1002, %1005 ], [ %.17490, %1027 ], [ %.17490, %1025 ]		; <<4 x float>> [#uses=1]
 	load <4 x float>* null		; <<4 x float>>:1029 [#uses=0]
 	load <4 x float>* null		; <<4 x float>>:1030 [#uses=0]
-	sub <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1031 [#uses=1]
+	fsub <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1031 [#uses=1]
 	br i1 false, label %1037, label %1032
 
 ; <label>:1032		; preds = %xST.exit469
@@ -1368,8 +1368,8 @@ xST.exit472:		; preds = %1050, %1048, %1032
 
 xST.exit474:		; preds = %1059, %1058, %1051
 	load <4 x float>* null		; <<4 x float>>:1060 [#uses=1]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1061 [#uses=1]
-	mul <4 x float> %1060, zeroinitializer		; <<4 x float>>:1062 [#uses=2]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1061 [#uses=1]
+	fmul <4 x float> %1060, zeroinitializer		; <<4 x float>>:1062 [#uses=2]
 	br i1 false, label %1065, label %1063
 
 ; <label>:1063		; preds = %xST.exit474
@@ -1556,8 +1556,8 @@ xST.exit489:		; preds = %1109, %1108, %1101
 
 xST.exit492:		; preds = %1118, %1117, %1110
 	load <4 x float>* null		; <<4 x float>>:1119 [#uses=1]
-	mul <4 x float> %1119, zeroinitializer		; <<4 x float>>:1120 [#uses=1]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1121 [#uses=1]
+	fmul <4 x float> %1119, zeroinitializer		; <<4 x float>>:1120 [#uses=1]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1121 [#uses=1]
 	br i1 false, label %1123, label %1122
 
 ; <label>:1122		; preds = %xST.exit492
@@ -1591,8 +1591,8 @@ xST.exit495:		; preds = %1130, %1129, %1122
 	%.07582 = phi <4 x float> [ %1121, %1122 ], [ %.17583, %1130 ], [ %.17583, %1129 ]		; <<4 x float>> [#uses=1]
 	%.07590 = phi <4 x float> [ %1120, %1122 ], [ %.17591, %1130 ], [ %.17591, %1129 ]		; <<4 x float>> [#uses=1]
 	load <4 x float>* null		; <<4 x float>>:1131 [#uses=1]
-	add <4 x float> %1131, zeroinitializer		; <<4 x float>>:1132 [#uses=1]
-	add <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1133 [#uses=1]
+	fadd <4 x float> %1131, zeroinitializer		; <<4 x float>>:1132 [#uses=1]
+	fadd <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1133 [#uses=1]
 	br i1 false, label %1135, label %1134
 
 ; <label>:1134		; preds = %xST.exit495
@@ -1633,10 +1633,10 @@ xST.exit498:		; preds = %1142, %1141, %1134
 	shufflevector <4 x float> %1143, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:1148 [#uses=1]
 	shufflevector <4 x float> %1145, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:1149 [#uses=1]
 	shufflevector <4 x float> %1147, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:1150 [#uses=1]
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1151 [#uses=1]
-	mul <4 x float> zeroinitializer, %1148		; <<4 x float>>:1152 [#uses=1]
-	mul <4 x float> zeroinitializer, %1149		; <<4 x float>>:1153 [#uses=1]
-	mul <4 x float> zeroinitializer, %1150		; <<4 x float>>:1154 [#uses=1]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1151 [#uses=1]
+	fmul <4 x float> zeroinitializer, %1148		; <<4 x float>>:1152 [#uses=1]
+	fmul <4 x float> zeroinitializer, %1149		; <<4 x float>>:1153 [#uses=1]
+	fmul <4 x float> zeroinitializer, %1150		; <<4 x float>>:1154 [#uses=1]
 	br i1 false, label %1156, label %1155
 
 ; <label>:1155		; preds = %xST.exit498
@@ -1676,10 +1676,10 @@ xST.exit501:		; preds = %1163, %1162, %1155
 	load <4 x float>* %1165		; <<4 x float>>:1166 [#uses=1]
 	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:1167 [#uses=1]
 	load <4 x float>* %1167		; <<4 x float>>:1168 [#uses=1]
-	add <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1169 [#uses=1]
-	add <4 x float> zeroinitializer, %1164		; <<4 x float>>:1170 [#uses=1]
-	add <4 x float> zeroinitializer, %1166		; <<4 x float>>:1171 [#uses=1]
-	add <4 x float> zeroinitializer, %1168		; <<4 x float>>:1172 [#uses=1]
+	fadd <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1169 [#uses=1]
+	fadd <4 x float> zeroinitializer, %1164		; <<4 x float>>:1170 [#uses=1]
+	fadd <4 x float> zeroinitializer, %1166		; <<4 x float>>:1171 [#uses=1]
+	fadd <4 x float> zeroinitializer, %1168		; <<4 x float>>:1172 [#uses=1]
 	br i1 false, label %1174, label %1173
 
 ; <label>:1173		; preds = %xST.exit501
@@ -1714,7 +1714,7 @@ xST.exit504:		; preds = %1181, %1180, %1173
 	%.07726 = phi <4 x float> [ %1171, %1173 ], [ %.17727, %1181 ], [ %.17727, %1180 ]		; <<4 x float>> [#uses=1]
 	%.07730 = phi <4 x float> [ %1170, %1173 ], [ %.17731, %1181 ], [ %.17731, %1180 ]		; <<4 x float>> [#uses=1]
 	%.07734 = phi <4 x float> [ %1169, %1173 ], [ %.17735, %1181 ], [ %.17735, %1180 ]		; <<4 x float>> [#uses=1]
-	add <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1182 [#uses=1]
+	fadd <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1182 [#uses=1]
 	br i1 false, label %1184, label %1183
 
 ; <label>:1183		; preds = %xST.exit504
diff --git a/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll b/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
index 5cccd315535b..aca0faaa4e41 100644
--- a/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
+++ b/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
@@ -9,8 +9,8 @@ entry:
         %input2 = load <4 x float>* null, align 16               ; <<4 x float>>
        	%shuffle7 = shufflevector <4 x float> %input2, <4 x float> < float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>> [#uses=1]
 
-        %mul1 = mul <4 x float> %shuffle7, zeroinitializer              ; <<4 x
-        %add2 = add <4 x float> %mul1, %input2          ; <<4 x float>>
+        %mul1 = fmul <4 x float> %shuffle7, zeroinitializer              ; <<4 x
+        %add2 = fadd <4 x float> %mul1, %input2          ; <<4 x float>>
         store <4 x float> %add2, <4 x float>* null, align 16
         ret void
 }
diff --git a/test/CodeGen/PowerPC/2008-07-15-Fabs.ll b/test/CodeGen/PowerPC/2008-07-15-Fabs.ll
index 7d86434d24e2..f55ffac45b08 100644
--- a/test/CodeGen/PowerPC/2008-07-15-Fabs.ll
+++ b/test/CodeGen/PowerPC/2008-07-15-Fabs.ll
@@ -7,11 +7,11 @@ entry:
 	call ppc_fp128 @fabsl( ppc_fp128 %d ) nounwind readnone 		; <ppc_fp128>:0 [#uses=1]
 	fcmp olt ppc_fp128 0xM00000000000000000000000000000000, %0		; <i1>:1 [#uses=1]
 	%.pn106 = select i1 %1, ppc_fp128 %a, ppc_fp128 0xM00000000000000000000000000000000		; <ppc_fp128> [#uses=1]
-	%.pn = sub ppc_fp128 0xM00000000000000000000000000000000, %.pn106		; <ppc_fp128> [#uses=1]
+	%.pn = fsub ppc_fp128 0xM00000000000000000000000000000000, %.pn106		; <ppc_fp128> [#uses=1]
 	%y.0 = fdiv ppc_fp128 %.pn, 0xM00000000000000000000000000000000		; <ppc_fp128> [#uses=1]
-	mul ppc_fp128 %y.0, 0xM3FF00000000000000000000000000000		; <ppc_fp128>:2 [#uses=1]
-	add ppc_fp128 %2, mul (ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 0xM00000000000000000000000000000000)		; <ppc_fp128>:3 [#uses=1]
-	%tmpi = add ppc_fp128 %3, 0xM00000000000000000000000000000000		; <ppc_fp128> [#uses=1]
+	fmul ppc_fp128 %y.0, 0xM3FF00000000000000000000000000000		; <ppc_fp128>:2 [#uses=1]
+	fadd ppc_fp128 %2, fmul (ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 0xM00000000000000000000000000000000)		; <ppc_fp128>:3 [#uses=1]
+	%tmpi = fadd ppc_fp128 %3, 0xM00000000000000000000000000000000		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmpi, ppc_fp128* null, align 16
 	ret i256 0
 }
diff --git a/test/CodeGen/PowerPC/2008-07-17-Fneg.ll b/test/CodeGen/PowerPC/2008-07-17-Fneg.ll
index 54bb4b37f5e4..a7f8181fd906 100644
--- a/test/CodeGen/PowerPC/2008-07-17-Fneg.ll
+++ b/test/CodeGen/PowerPC/2008-07-17-Fneg.ll
@@ -7,7 +7,7 @@ entry:
 	br i1 false, label %bb3, label %bb4
 
 bb3:		; preds = %entry
-	sub ppc_fp128 0xM80000000000000000000000000000000, 0xM00000000000000000000000000000000		; <ppc_fp128>:0 [#uses=1]
+	fsub ppc_fp128 0xM80000000000000000000000000000000, 0xM00000000000000000000000000000000		; <ppc_fp128>:0 [#uses=1]
 	fptoui ppc_fp128 %0 to i32		; <i32>:1 [#uses=1]
 	zext i32 %1 to i64		; <i64>:2 [#uses=1]
 	sub i64 0, %2		; <i64>:3 [#uses=1]
diff --git a/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll b/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
index c181b1c97021..b625cebaca41 100644
--- a/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
+++ b/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
@@ -29,10 +29,10 @@ bb2217:		; preds = %bb2326
 	%10 = load float* %9, align 4		; <float> [#uses=1]
 	%11 = getelementptr float* null, i32 3		; <float*> [#uses=1]
 	%12 = load float* %11, align 4		; <float> [#uses=1]
-	%13 = mul float %10, 6.553500e+04		; <float> [#uses=1]
-	%14 = add float %13, 5.000000e-01		; <float> [#uses=1]
-	%15 = mul float %12, 6.553500e+04		; <float> [#uses=1]
-	%16 = add float %15, 5.000000e-01		; <float> [#uses=3]
+	%13 = fmul float %10, 6.553500e+04		; <float> [#uses=1]
+	%14 = fadd float %13, 5.000000e-01		; <float> [#uses=1]
+	%15 = fmul float %12, 6.553500e+04		; <float> [#uses=1]
+	%16 = fadd float %15, 5.000000e-01		; <float> [#uses=3]
 	%17 = fcmp olt float %14, 0.000000e+00		; <i1> [#uses=0]
 	%18 = fcmp olt float %16, 0.000000e+00		; <i1> [#uses=1]
 	br i1 %18, label %bb2265, label %bb2262
@@ -68,10 +68,10 @@ bb2265:		; preds = %bb2264, %bb2262, %bb2217
 	%37 = load float* %36, align 4		; <float> [#uses=1]
 	%38 = getelementptr float* %36, i32 1		; <float*> [#uses=1]
 	%39 = load float* %38, align 4		; <float> [#uses=1]
-	%40 = mul float %37, 6.553500e+04		; <float> [#uses=1]
-	%41 = add float %40, 5.000000e-01		; <float> [#uses=1]
-	%42 = mul float %39, 6.553500e+04		; <float> [#uses=1]
-	%43 = add float %42, 5.000000e-01		; <float> [#uses=3]
+	%40 = fmul float %37, 6.553500e+04		; <float> [#uses=1]
+	%41 = fadd float %40, 5.000000e-01		; <float> [#uses=1]
+	%42 = fmul float %39, 6.553500e+04		; <float> [#uses=1]
+	%43 = fadd float %42, 5.000000e-01		; <float> [#uses=3]
 	%44 = fcmp olt float %41, 0.000000e+00		; <i1> [#uses=0]
 	%45 = fcmp olt float %43, 0.000000e+00		; <i1> [#uses=1]
 	br i1 %45, label %bb2277, label %bb2274
@@ -88,10 +88,10 @@ bb2277:		; preds = %bb2274, %bb2265
 	%50 = load float* %49, align 4		; <float> [#uses=1]
 	%51 = getelementptr float* %36, i32 3		; <float*> [#uses=1]
 	%52 = load float* %51, align 4		; <float> [#uses=1]
-	%53 = mul float %50, 6.553500e+04		; <float> [#uses=1]
-	%54 = add float %53, 5.000000e-01		; <float> [#uses=1]
-	%55 = mul float %52, 6.553500e+04		; <float> [#uses=1]
-	%56 = add float %55, 5.000000e-01		; <float> [#uses=1]
+	%53 = fmul float %50, 6.553500e+04		; <float> [#uses=1]
+	%54 = fadd float %53, 5.000000e-01		; <float> [#uses=1]
+	%55 = fmul float %52, 6.553500e+04		; <float> [#uses=1]
+	%56 = fadd float %55, 5.000000e-01		; <float> [#uses=1]
 	%57 = fcmp olt float %54, 0.000000e+00		; <i1> [#uses=0]
 	%58 = fcmp olt float %56, 0.000000e+00		; <i1> [#uses=0]
 	%59 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
@@ -111,10 +111,10 @@ bb2277:		; preds = %bb2274, %bb2265
 	%73 = load float* %72, align 4		; <float> [#uses=1]
 	%74 = getelementptr float* %72, i32 1		; <float*> [#uses=1]
 	%75 = load float* %74, align 4		; <float> [#uses=1]
-	%76 = mul float %73, 6.553500e+04		; <float> [#uses=1]
-	%77 = add float %76, 5.000000e-01		; <float> [#uses=3]
-	%78 = mul float %75, 6.553500e+04		; <float> [#uses=1]
-	%79 = add float %78, 5.000000e-01		; <float> [#uses=1]
+	%76 = fmul float %73, 6.553500e+04		; <float> [#uses=1]
+	%77 = fadd float %76, 5.000000e-01		; <float> [#uses=3]
+	%78 = fmul float %75, 6.553500e+04		; <float> [#uses=1]
+	%79 = fadd float %78, 5.000000e-01		; <float> [#uses=1]
 	%80 = fcmp olt float %77, 0.000000e+00		; <i1> [#uses=1]
 	br i1 %80, label %bb2295, label %bb2292
 
@@ -134,10 +134,10 @@ bb2295:		; preds = %bb2294, %bb2292, %bb2277
 	%86 = load float* %85, align 4		; <float> [#uses=1]
 	%87 = getelementptr float* %72, i32 3		; <float*> [#uses=1]
 	%88 = load float* %87, align 4		; <float> [#uses=1]
-	%89 = mul float %86, 6.553500e+04		; <float> [#uses=1]
-	%90 = add float %89, 5.000000e-01		; <float> [#uses=1]
-	%91 = mul float %88, 6.553500e+04		; <float> [#uses=1]
-	%92 = add float %91, 5.000000e-01		; <float> [#uses=1]
+	%89 = fmul float %86, 6.553500e+04		; <float> [#uses=1]
+	%90 = fadd float %89, 5.000000e-01		; <float> [#uses=1]
+	%91 = fmul float %88, 6.553500e+04		; <float> [#uses=1]
+	%92 = fadd float %91, 5.000000e-01		; <float> [#uses=1]
 	%93 = fcmp olt float %90, 0.000000e+00		; <i1> [#uses=0]
 	%94 = fcmp olt float %92, 0.000000e+00		; <i1> [#uses=0]
 	%95 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll b/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
index 0283082e8c0f..c760b41b3047 100644
--- a/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
+++ b/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
@@ -3,9 +3,9 @@
 define void @__divtc3({ ppc_fp128, ppc_fp128 }* noalias sret %agg.result, ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) nounwind {
 entry:
         %imag59 = load ppc_fp128* null, align 8         ; <ppc_fp128> [#uses=1]
-        %0 = mul ppc_fp128 0xM00000000000000000000000000000000, %imag59         ; <ppc_fp128> [#uses=1]
-        %1 = mul ppc_fp128 0xM00000000000000000000000000000000, 0xM00000000000000000000000000000000             ; <ppc_fp128> [#uses=1]
-        %2 = add ppc_fp128 %0, %1               ; <ppc_fp128> [#uses=1]
+        %0 = fmul ppc_fp128 0xM00000000000000000000000000000000, %imag59         ; <ppc_fp128> [#uses=1]
+        %1 = fmul ppc_fp128 0xM00000000000000000000000000000000, 0xM00000000000000000000000000000000             ; <ppc_fp128> [#uses=1]
+        %2 = fadd ppc_fp128 %0, %1               ; <ppc_fp128> [#uses=1]
         store ppc_fp128 %2, ppc_fp128* null, align 16
         unreachable
 }
diff --git a/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll b/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
index 4db577357da1..071c78833bae 100644
--- a/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
+++ b/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
@@ -6,17 +6,17 @@ entry:
 	br i1 %0, label %bb5, label %bb1
 
 bb1:		; preds = %entry
-	%1 = mul ppc_fp128 %a, 0xM3DF00000000000000000000000000000		; <ppc_fp128> [#uses=1]
+	%1 = fmul ppc_fp128 %a, 0xM3DF00000000000000000000000000000		; <ppc_fp128> [#uses=1]
 	%2 = fptoui ppc_fp128 %1 to i32		; <i32> [#uses=1]
 	%3 = zext i32 %2 to i64		; <i64> [#uses=1]
 	%4 = shl i64 %3, 32		; <i64> [#uses=3]
 	%5 = uitofp i64 %4 to ppc_fp128		; <ppc_fp128> [#uses=1]
-	%6 = sub ppc_fp128 %a, %5		; <ppc_fp128> [#uses=3]
+	%6 = fsub ppc_fp128 %a, %5		; <ppc_fp128> [#uses=3]
 	%7 = fcmp olt ppc_fp128 %6, 0xM00000000000000000000000000000000		; <i1> [#uses=1]
 	br i1 %7, label %bb2, label %bb3
 
 bb2:		; preds = %bb1
-	%8 = sub ppc_fp128 0xM80000000000000000000000000000000, %6		; <ppc_fp128> [#uses=1]
+	%8 = fsub ppc_fp128 0xM80000000000000000000000000000000, %6		; <ppc_fp128> [#uses=1]
 	%9 = fptoui ppc_fp128 %8 to i32		; <i32> [#uses=1]
 	%10 = zext i32 %9 to i64		; <i64> [#uses=1]
 	%11 = sub i64 %4, %10		; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/buildvec_canonicalize.ll b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
index 66428c77e45a..20ff3dbc4f7b 100644
--- a/test/CodeGen/PowerPC/buildvec_canonicalize.ll
+++ b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
@@ -11,7 +11,7 @@
 define void @VXOR(<4 x float>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
         %tmp = load <4 x float>* %P3            ; <<4 x float>> [#uses=1]
         %tmp3 = load <4 x float>* %P1           ; <<4 x float>> [#uses=1]
-        %tmp4 = mul <4 x float> %tmp, %tmp3             ; <<4 x float>> [#uses=1]
+        %tmp4 = fmul <4 x float> %tmp, %tmp3             ; <<4 x float>> [#uses=1]
         store <4 x float> %tmp4, <4 x float>* %P3
         store <4 x float> zeroinitializer, <4 x float>* %P1
         store <4 x i32> zeroinitializer, <4 x i32>* %P2
diff --git a/test/CodeGen/PowerPC/fma.ll b/test/CodeGen/PowerPC/fma.ll
index fd9bd7400759..4a6fe70574f4 100644
--- a/test/CodeGen/PowerPC/fma.ll
+++ b/test/CodeGen/PowerPC/fma.ll
@@ -2,53 +2,53 @@
 ; RUN:   egrep {fn?madd|fn?msub} | count 8
 
 define double @test_FMADD1(double %A, double %B, double %C) {
-	%D = mul double %A, %B		; <double> [#uses=1]
-	%E = add double %D, %C		; <double> [#uses=1]
+	%D = fmul double %A, %B		; <double> [#uses=1]
+	%E = fadd double %D, %C		; <double> [#uses=1]
 	ret double %E
 }
 
 define double @test_FMADD2(double %A, double %B, double %C) {
-	%D = mul double %A, %B		; <double> [#uses=1]
-	%E = add double %D, %C		; <double> [#uses=1]
+	%D = fmul double %A, %B		; <double> [#uses=1]
+	%E = fadd double %D, %C		; <double> [#uses=1]
 	ret double %E
 }
 
 define double @test_FMSUB(double %A, double %B, double %C) {
-	%D = mul double %A, %B		; <double> [#uses=1]
-	%E = sub double %D, %C		; <double> [#uses=1]
+	%D = fmul double %A, %B		; <double> [#uses=1]
+	%E = fsub double %D, %C		; <double> [#uses=1]
 	ret double %E
 }
 
 define double @test_FNMADD1(double %A, double %B, double %C) {
-	%D = mul double %A, %B		; <double> [#uses=1]
-	%E = add double %D, %C		; <double> [#uses=1]
-	%F = sub double -0.000000e+00, %E		; <double> [#uses=1]
+	%D = fmul double %A, %B		; <double> [#uses=1]
+	%E = fadd double %D, %C		; <double> [#uses=1]
+	%F = fsub double -0.000000e+00, %E		; <double> [#uses=1]
 	ret double %F
 }
 
 define double @test_FNMADD2(double %A, double %B, double %C) {
-	%D = mul double %A, %B		; <double> [#uses=1]
-	%E = add double %C, %D		; <double> [#uses=1]
-	%F = sub double -0.000000e+00, %E		; <double> [#uses=1]
+	%D = fmul double %A, %B		; <double> [#uses=1]
+	%E = fadd double %C, %D		; <double> [#uses=1]
+	%F = fsub double -0.000000e+00, %E		; <double> [#uses=1]
 	ret double %F
 }
 
 define double @test_FNMSUB1(double %A, double %B, double %C) {
-	%D = mul double %A, %B		; <double> [#uses=1]
-	%E = sub double %C, %D		; <double> [#uses=1]
+	%D = fmul double %A, %B		; <double> [#uses=1]
+	%E = fsub double %C, %D		; <double> [#uses=1]
 	ret double %E
 }
 
 define double @test_FNMSUB2(double %A, double %B, double %C) {
-	%D = mul double %A, %B		; <double> [#uses=1]
-	%E = sub double %D, %C		; <double> [#uses=1]
-	%F = sub double -0.000000e+00, %E		; <double> [#uses=1]
+	%D = fmul double %A, %B		; <double> [#uses=1]
+	%E = fsub double %D, %C		; <double> [#uses=1]
+	%F = fsub double -0.000000e+00, %E		; <double> [#uses=1]
 	ret double %F
 }
 
 define float @test_FNMSUBS(float %A, float %B, float %C) {
-	%D = mul float %A, %B		; <float> [#uses=1]
-	%E = sub float %D, %C		; <float> [#uses=1]
-	%F = sub float -0.000000e+00, %E		; <float> [#uses=1]
+	%D = fmul float %A, %B		; <float> [#uses=1]
+	%E = fsub float %D, %C		; <float> [#uses=1]
+	%F = fsub float -0.000000e+00, %E		; <float> [#uses=1]
 	ret float %F
 }
diff --git a/test/CodeGen/PowerPC/fnabs.ll b/test/CodeGen/PowerPC/fnabs.ll
index b9517de28f9d..6c10dfbd44b0 100644
--- a/test/CodeGen/PowerPC/fnabs.ll
+++ b/test/CodeGen/PowerPC/fnabs.ll
@@ -4,7 +4,7 @@ declare double @fabs(double)
 
 define double @test(double %X) {
         %Y = call double @fabs( double %X )             ; <double> [#uses=1]
-        %Z = sub double -0.000000e+00, %Y               ; <double> [#uses=1]
+        %Z = fsub double -0.000000e+00, %Y               ; <double> [#uses=1]
         ret double %Z
 }
 
diff --git a/test/CodeGen/PowerPC/fneg.ll b/test/CodeGen/PowerPC/fneg.ll
index a4f49f7625c7..9579a748e98e 100644
--- a/test/CodeGen/PowerPC/fneg.ll
+++ b/test/CodeGen/PowerPC/fneg.ll
@@ -2,10 +2,10 @@
 
 define double @test1(double %a, double %b, double %c, double %d) {
 entry:
-        %tmp2 = sub double -0.000000e+00, %c            ; <double> [#uses=1]
-        %tmp4 = mul double %tmp2, %d            ; <double> [#uses=1]
-        %tmp7 = mul double %a, %b               ; <double> [#uses=1]
-        %tmp9 = sub double %tmp7, %tmp4         ; <double> [#uses=1]
+        %tmp2 = fsub double -0.000000e+00, %c            ; <double> [#uses=1]
+        %tmp4 = fmul double %tmp2, %d            ; <double> [#uses=1]
+        %tmp7 = fmul double %a, %b               ; <double> [#uses=1]
+        %tmp9 = fsub double %tmp7, %tmp4         ; <double> [#uses=1]
         ret double %tmp9
 }
 
diff --git a/test/CodeGen/PowerPC/int-fp-conv-1.ll b/test/CodeGen/PowerPC/int-fp-conv-1.ll
index 3d6667539f24..583408c0eae2 100644
--- a/test/CodeGen/PowerPC/int-fp-conv-1.ll
+++ b/test/CodeGen/PowerPC/int-fp-conv-1.ll
@@ -3,7 +3,7 @@
 define i64 @__fixunstfdi(ppc_fp128 %a) nounwind  {
 entry:
 	%tmp1213 = uitofp i64 0 to ppc_fp128		; <ppc_fp128> [#uses=1]
-	%tmp15 = sub ppc_fp128 %a, %tmp1213		; <ppc_fp128> [#uses=1]
+	%tmp15 = fsub ppc_fp128 %a, %tmp1213		; <ppc_fp128> [#uses=1]
 	%tmp2829 = fptoui ppc_fp128 %tmp15 to i32		; <i32> [#uses=1]
 	%tmp282930 = zext i32 %tmp2829 to i64		; <i64> [#uses=1]
 	%tmp32 = add i64 %tmp282930, 0		; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/itofp128.ll b/test/CodeGen/PowerPC/itofp128.ll
index 91119e9e8ef5..4d745111b04b 100644
--- a/test/CodeGen/PowerPC/itofp128.ll
+++ b/test/CodeGen/PowerPC/itofp128.ll
@@ -6,7 +6,7 @@ target triple = "powerpc64-apple-darwin9.2.0"
 define i128 @__fixunstfti(ppc_fp128 %a) nounwind  {
 entry:
         %tmp1213 = uitofp i128 0 to ppc_fp128           ; <ppc_fp128> [#uses=1]
-        %tmp15 = sub ppc_fp128 %a, %tmp1213             ; <ppc_fp128> [#uses=1]
+        %tmp15 = fsub ppc_fp128 %a, %tmp1213             ; <ppc_fp128> [#uses=1]
         %tmp2829 = fptoui ppc_fp128 %tmp15 to i64               ; <i64> [#uses=1]
         %tmp282930 = zext i64 %tmp2829 to i128          ; <i128> [#uses=1]
         %tmp32 = add i128 %tmp282930, 0         ; <i128> [#uses=1]
diff --git a/test/CodeGen/PowerPC/mem-rr-addr-mode.ll b/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
index d5484bd4b78d..fd0e1d4a2ea8 100644
--- a/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
+++ b/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
@@ -9,9 +9,9 @@ define void @func(<4 x float>* %a, <4 x float>* %b) {
         %tmp = load <4 x float>* %tmp1          ; <<4 x float>> [#uses=1]
         %tmp3 = getelementptr <4 x float>* %a, i32 1            ; <<4 x float>*> [#uses=1]
         %tmp4 = load <4 x float>* %tmp3         ; <<4 x float>> [#uses=1]
-        %tmp5 = mul <4 x float> %tmp, %tmp4             ; <<4 x float>> [#uses=1]
+        %tmp5 = fmul <4 x float> %tmp, %tmp4             ; <<4 x float>> [#uses=1]
         %tmp8 = load <4 x float>* %b            ; <<4 x float>> [#uses=1]
-        %tmp9 = add <4 x float> %tmp5, %tmp8            ; <<4 x float>> [#uses=1]
+        %tmp9 = fadd <4 x float> %tmp5, %tmp8            ; <<4 x float>> [#uses=1]
         store <4 x float> %tmp9, <4 x float>* %a
         ret void
 }
diff --git a/test/CodeGen/PowerPC/multiple-return-values.ll b/test/CodeGen/PowerPC/multiple-return-values.ll
index b72b148259dc..3f75f7d28ed6 100644
--- a/test/CodeGen/PowerPC/multiple-return-values.ll
+++ b/test/CodeGen/PowerPC/multiple-return-values.ll
@@ -3,7 +3,7 @@
 
 define {i64, float} @bar(i64 %a, float %b) {
         %y = add i64 %a, 7
-        %z = add float %b, 7.0
+        %z = fadd float %b, 7.0
 	ret i64 %y, float %z
 }
 
diff --git a/test/CodeGen/PowerPC/ppcf128-1-opt.ll b/test/CodeGen/PowerPC/ppcf128-1-opt.ll
index 5c059b4ea291..e3c5ab122545 100644
--- a/test/CodeGen/PowerPC/ppcf128-1-opt.ll
+++ b/test/CodeGen/PowerPC/ppcf128-1-opt.ll
@@ -5,19 +5,19 @@ target triple = "powerpc-apple-darwin8"
 
 define ppc_fp128 @plus(ppc_fp128 %x, ppc_fp128 %y) {
 entry:
-	%tmp3 = add ppc_fp128 %x, %y		; <ppc_fp128> [#uses=1]
+	%tmp3 = fadd ppc_fp128 %x, %y		; <ppc_fp128> [#uses=1]
 	ret ppc_fp128 %tmp3
 }
 
 define ppc_fp128 @minus(ppc_fp128 %x, ppc_fp128 %y) {
 entry:
-	%tmp3 = sub ppc_fp128 %x, %y		; <ppc_fp128> [#uses=1]
+	%tmp3 = fsub ppc_fp128 %x, %y		; <ppc_fp128> [#uses=1]
 	ret ppc_fp128 %tmp3
 }
 
 define ppc_fp128 @times(ppc_fp128 %x, ppc_fp128 %y) {
 entry:
-	%tmp3 = mul ppc_fp128 %x, %y		; <ppc_fp128> [#uses=1]
+	%tmp3 = fmul ppc_fp128 %x, %y		; <ppc_fp128> [#uses=1]
 	ret ppc_fp128 %tmp3
 }
 
diff --git a/test/CodeGen/PowerPC/ppcf128-1.ll b/test/CodeGen/PowerPC/ppcf128-1.ll
index ea8dd37c97d8..a487de7fd577 100644
--- a/test/CodeGen/PowerPC/ppcf128-1.ll
+++ b/test/CodeGen/PowerPC/ppcf128-1.ll
@@ -14,7 +14,7 @@ entry:
 	store ppc_fp128 %y, ppc_fp128* %y_addr
 	%tmp1 = load ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
 	%tmp2 = load ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
-	%tmp3 = add ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
+	%tmp3 = fadd ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
 	%tmp4 = load ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
@@ -36,7 +36,7 @@ entry:
 	store ppc_fp128 %y, ppc_fp128* %y_addr
 	%tmp1 = load ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
 	%tmp2 = load ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
-	%tmp3 = sub ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
+	%tmp3 = fsub ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
 	%tmp4 = load ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
@@ -58,7 +58,7 @@ entry:
 	store ppc_fp128 %y, ppc_fp128* %y_addr
 	%tmp1 = load ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
 	%tmp2 = load ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
-	%tmp3 = mul ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
+	%tmp3 = fmul ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
 	%tmp4 = load ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
diff --git a/test/CodeGen/PowerPC/ppcf128-2.ll b/test/CodeGen/PowerPC/ppcf128-2.ll
index b4f61f89fb64..43182266e731 100644
--- a/test/CodeGen/PowerPC/ppcf128-2.ll
+++ b/test/CodeGen/PowerPC/ppcf128-2.ll
@@ -4,7 +4,7 @@ define i64 @__fixtfdi(ppc_fp128 %a) nounwind  {
 entry:
         br i1 false, label %bb, label %bb8
 bb:             ; preds = %entry
-        %tmp5 = sub ppc_fp128 0xM80000000000000000000000000000000, %a           ; <ppc_fp128> [#uses=1]
+        %tmp5 = fsub ppc_fp128 0xM80000000000000000000000000000000, %a           ; <ppc_fp128> [#uses=1]
         %tmp6 = tail call i64 @__fixunstfdi( ppc_fp128 %tmp5 ) nounwind                 ; <i64> [#uses=0]
         ret i64 0
 bb8:            ; preds = %entry
diff --git a/test/CodeGen/PowerPC/ppcf128-4.ll b/test/CodeGen/PowerPC/ppcf128-4.ll
index 8921dfcd5a0d..16d61780a46c 100644
--- a/test/CodeGen/PowerPC/ppcf128-4.ll
+++ b/test/CodeGen/PowerPC/ppcf128-4.ll
@@ -2,9 +2,9 @@
 
 define ppc_fp128 @__floatditf(i64 %u) nounwind  {
 entry:
-        %tmp6 = mul ppc_fp128 0xM00000000000000000000000000000000, 0xM41F00000000000000000000000000000
+        %tmp6 = fmul ppc_fp128 0xM00000000000000000000000000000000, 0xM41F00000000000000000000000000000
         %tmp78 = trunc i64 %u to i32
         %tmp789 = uitofp i32 %tmp78 to ppc_fp128
-        %tmp11 = add ppc_fp128 %tmp789, %tmp6
+        %tmp11 = fadd ppc_fp128 %tmp789, %tmp6
         ret ppc_fp128 %tmp11
 }
diff --git a/test/CodeGen/PowerPC/return-val-i128.ll b/test/CodeGen/PowerPC/return-val-i128.ll
index 6e68ee3c5203..27a5004bd12a 100644
--- a/test/CodeGen/PowerPC/return-val-i128.ll
+++ b/test/CodeGen/PowerPC/return-val-i128.ll
@@ -14,7 +14,7 @@ entry:
 	br i1 %toBool, label %bb, label %bb8
 bb:		; preds = %entry
 	%tmp4 = load float* %a_addr, align 4		; <float> [#uses=1]
-	%tmp5 = sub float -0.000000e+00, %tmp4		; <float> [#uses=1]
+	%tmp5 = fsub float -0.000000e+00, %tmp4		; <float> [#uses=1]
 	%tmp6 = call i128 @__fixunssfDI( float %tmp5 ) nounwind 		; <i128> [#uses=1]
 	%tmp7 = sub i128 0, %tmp6		; <i128> [#uses=1]
 	store i128 %tmp7, i128* %tmp, align 16
diff --git a/test/CodeGen/PowerPC/unsafe-math.ll b/test/CodeGen/PowerPC/unsafe-math.ll
index 3d52d0c9e7ed..d211b3b76f52 100644
--- a/test/CodeGen/PowerPC/unsafe-math.ll
+++ b/test/CodeGen/PowerPC/unsafe-math.ll
@@ -3,8 +3,8 @@
 ; RUN:   grep fmul | count 1
 
 define double @foo(double %X) {
-        %tmp1 = mul double %X, 1.23
-        %tmp2 = mul double %tmp1, 4.124
+        %tmp1 = fmul double %X, 1.23
+        %tmp2 = fmul double %tmp1, 4.124
         ret double %tmp2
 }
 
diff --git a/test/CodeGen/PowerPC/vec_fneg.ll b/test/CodeGen/PowerPC/vec_fneg.ll
index 2ef2099ddb97..9fdbffd33ed5 100644
--- a/test/CodeGen/PowerPC/vec_fneg.ll
+++ b/test/CodeGen/PowerPC/vec_fneg.ll
@@ -2,7 +2,7 @@
 
 define void @t(<4 x float>* %A) {
 	%tmp2 = load <4 x float>* %A
-	%tmp3 = sub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp2
+	%tmp3 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp2
 	store <4 x float> %tmp3, <4 x float>* %A
 	ret void
 }
diff --git a/test/CodeGen/PowerPC/vec_splat.ll b/test/CodeGen/PowerPC/vec_splat.ll
index a63113700038..7b7e4fe33477 100644
--- a/test/CodeGen/PowerPC/vec_splat.ll
+++ b/test/CodeGen/PowerPC/vec_splat.ll
@@ -15,7 +15,7 @@ define void @splat(%f4* %P, %f4* %Q, float %X) nounwind {
         %tmp4 = insertelement %f4 %tmp2, float %X, i32 2                ; <%f4> [#uses=1]
         %tmp6 = insertelement %f4 %tmp4, float %X, i32 3                ; <%f4> [#uses=1]
         %q = load %f4* %Q               ; <%f4> [#uses=1]
-        %R = add %f4 %q, %tmp6          ; <%f4> [#uses=1]
+        %R = fadd %f4 %q, %tmp6          ; <%f4> [#uses=1]
         store %f4 %R, %f4* %P
         ret void
 }
diff --git a/test/CodeGen/PowerPC/vec_zero.ll b/test/CodeGen/PowerPC/vec_zero.ll
index 8d06a7d94dac..7350e91b7741 100644
--- a/test/CodeGen/PowerPC/vec_zero.ll
+++ b/test/CodeGen/PowerPC/vec_zero.ll
@@ -2,7 +2,7 @@
 
 define void @foo(<4 x float>* %P) {
         %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]
-        %S = add <4 x float> zeroinitializer, %T                ; <<4 x float>> [#uses=1]
+        %S = fadd <4 x float> zeroinitializer, %T                ; <<4 x float>> [#uses=1]
         store <4 x float> %S, <4 x float>* %P
         ret void
 }
diff --git a/test/CodeGen/PowerPC/vector.ll b/test/CodeGen/PowerPC/vector.ll
index 679e69effa3b..a6c17b4bccf6 100644
--- a/test/CodeGen/PowerPC/vector.ll
+++ b/test/CodeGen/PowerPC/vector.ll
@@ -14,7 +14,7 @@
 define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
         %p = load %f1* %P               ; <%f1> [#uses=1]
         %q = load %f1* %Q               ; <%f1> [#uses=1]
-        %R = add %f1 %p, %q             ; <%f1> [#uses=1]
+        %R = fadd %f1 %p, %q             ; <%f1> [#uses=1]
         store %f1 %R, %f1* %S
         ret void
 }
@@ -22,7 +22,7 @@ define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
 define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
         %p = load %f2* %P               ; <%f2> [#uses=1]
         %q = load %f2* %Q               ; <%f2> [#uses=1]
-        %R = add %f2 %p, %q             ; <%f2> [#uses=1]
+        %R = fadd %f2 %p, %q             ; <%f2> [#uses=1]
         store %f2 %R, %f2* %S
         ret void
 }
@@ -30,7 +30,7 @@ define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
 define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
         %p = load %f4* %P               ; <%f4> [#uses=1]
         %q = load %f4* %Q               ; <%f4> [#uses=1]
-        %R = add %f4 %p, %q             ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, %q             ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
 }
@@ -38,7 +38,7 @@ define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
 define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
         %p = load %f8* %P               ; <%f8> [#uses=1]
         %q = load %f8* %Q               ; <%f8> [#uses=1]
-        %R = add %f8 %p, %q             ; <%f8> [#uses=1]
+        %R = fadd %f8 %p, %q             ; <%f8> [#uses=1]
         store %f8 %R, %f8* %S
         ret void
 }
@@ -46,7 +46,7 @@ define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
 define void @test_fmul(%f8* %P, %f8* %Q, %f8* %S) {
         %p = load %f8* %P               ; <%f8> [#uses=1]
         %q = load %f8* %Q               ; <%f8> [#uses=1]
-        %R = mul %f8 %p, %q             ; <%f8> [#uses=1]
+        %R = fmul %f8 %p, %q             ; <%f8> [#uses=1]
         store %f8 %R, %f8* %S
         ret void
 }
@@ -63,7 +63,7 @@ define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {
 
 define void @test_cst(%f4* %P, %f4* %S) {
         %p = load %f4* %P               ; <%f4> [#uses=1]
-        %R = add %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float
+        %R = fadd %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float
  2.000000e+00, float 4.500000e+00 >             ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
@@ -71,14 +71,14 @@ define void @test_cst(%f4* %P, %f4* %S) {
 
 define void @test_zero(%f4* %P, %f4* %S) {
         %p = load %f4* %P               ; <%f4> [#uses=1]
-        %R = add %f4 %p, zeroinitializer                ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, zeroinitializer                ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
 }
 
 define void @test_undef(%f4* %P, %f4* %S) {
         %p = load %f4* %P               ; <%f4> [#uses=1]
-        %R = add %f4 %p, undef          ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, undef          ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
 }
@@ -116,7 +116,7 @@ define double @test_extract_elt2(%d8* %P) {
 
 define void @test_cast_1(%f4* %b, %i4* %a) {
         %tmp = load %f4* %b             ; <%f4> [#uses=1]
-        %tmp2 = add %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float 
+        %tmp2 = fadd %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float
 3.000000e+00, float 4.000000e+00 >              ; <%f4> [#uses=1]
         %tmp3 = bitcast %f4 %tmp2 to %i4                ; <%i4> [#uses=1]
         %tmp4 = add %i4 %tmp3, < i32 1, i32 2, i32 3, i32 4 >           
@@ -140,7 +140,7 @@ define void @splat(%f4* %P, %f4* %Q, float %X) {
         %tmp4 = insertelement %f4 %tmp2, float %X, i32 2    
         %tmp6 = insertelement %f4 %tmp4, float %X, i32 3   
         %q = load %f4* %Q               ; <%f4> [#uses=1]
-        %R = add %f4 %q, %tmp6          ; <%f4> [#uses=1]
+        %R = fadd %f4 %q, %tmp6          ; <%f4> [#uses=1]
         store %f4 %R, %f4* %P
         ret void
 }
diff --git a/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll b/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll
index 15af046618f6..76f140ceaf85 100644
--- a/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll
+++ b/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll
@@ -2,8 +2,8 @@
 
 define void @execute_list() {
         %tmp.33.i = fdiv float 0.000000e+00, 0.000000e+00               ; <float> [#uses=1]
-        %tmp.37.i = mul float 0.000000e+00, %tmp.33.i           ; <float> [#uses=1]
-        %tmp.42.i = add float %tmp.37.i, 0.000000e+00           ; <float> [#uses=1]
+        %tmp.37.i = fmul float 0.000000e+00, %tmp.33.i           ; <float> [#uses=1]
+        %tmp.42.i = fadd float %tmp.37.i, 0.000000e+00           ; <float> [#uses=1]
         call void @gl_EvalCoord1f( float %tmp.42.i )
         ret void
 }
diff --git a/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll b/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll
index b5d215b2057a..04035aca998f 100644
--- a/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll
+++ b/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll
@@ -13,7 +13,7 @@ no_exit.16:             ; preds = %no_exit.16, %no_exit.16.preheader
 loopexit.16.loopexit:           ; preds = %no_exit.16
         br label %no_exit.18
 no_exit.18:             ; preds = %loopexit.20, %loopexit.16.loopexit
-        %tmp.882 = add float 0.000000e+00, 0.000000e+00         ; <float> [#uses=2]
+        %tmp.882 = fadd float 0.000000e+00, 0.000000e+00         ; <float> [#uses=2]
         br i1 false, label %loopexit.19, label %no_exit.19.preheader
 no_exit.19.preheader:           ; preds = %no_exit.18
         ret void
@@ -21,9 +21,9 @@ loopexit.19:            ; preds = %no_exit.18
         br i1 false, label %loopexit.20, label %no_exit.20
 no_exit.20:             ; preds = %loopexit.21, %loopexit.19
         %ai2.1122.tmp.3 = phi float [ %tmp.958, %loopexit.21 ], [ %tmp.882, %loopexit.19 ]              ; <float> [#uses=1]
-        %tmp.950 = mul float %tmp.882, %ai2.1122.tmp.3          ; <float> [#uses=1]
-        %tmp.951 = sub float 0.000000e+00, %tmp.950             ; <float> [#uses=1]
-        %tmp.958 = add float 0.000000e+00, 0.000000e+00         ; <float> [#uses=1]
+        %tmp.950 = fmul float %tmp.882, %ai2.1122.tmp.3          ; <float> [#uses=1]
+        %tmp.951 = fsub float 0.000000e+00, %tmp.950             ; <float> [#uses=1]
+        %tmp.958 = fadd float 0.000000e+00, 0.000000e+00         ; <float> [#uses=1]
         br i1 false, label %loopexit.21, label %no_exit.21.preheader
 no_exit.21.preheader:           ; preds = %no_exit.20
         ret void
diff --git a/test/CodeGen/X86/2006-05-25-CycleInDAG.ll b/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
index c9a004965fd7..78838d1141a4 100644
--- a/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
@@ -11,7 +11,7 @@ cond_next33:		; preds = %0
 	%tmp58.i = or i32 0, %tmp61.i.upgrd.1		; <i32> [#uses=1]
 	%tmp62.i = or i32 %tmp58.i, 0		; <i32> [#uses=1]
 	%tmp62.i.upgrd.2 = sitofp i32 %tmp62.i to double		; <double> [#uses=1]
-	%tmp64.i = add double %tmp62.i.upgrd.2, %tmp44.i		; <double> [#uses=1]
+	%tmp64.i = fadd double %tmp62.i.upgrd.2, %tmp44.i		; <double> [#uses=1]
 	%tmp68.i = call double @foo( double %tmp64.i, i32 0 )		; <double> [#uses=0]
 	ret i32 0
 }
diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll
index 811e9ac6d182..3b365f35cb22 100644
--- a/test/CodeGen/X86/2007-01-08-InstrSched.ll
+++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll
@@ -3,12 +3,12 @@
 ; RUN:   %prcontext {mulss	LCPI1_3} 1 | grep mulss | count 1
 
 define float @foo(float %x) {
-    %tmp1 = mul float %x, 3.000000e+00
-    %tmp3 = mul float %x, 5.000000e+00
-    %tmp5 = mul float %x, 7.000000e+00
-    %tmp7 = mul float %x, 1.100000e+01
-    %tmp10 = add float %tmp1, %tmp3
-    %tmp12 = add float %tmp10, %tmp5
-    %tmp14 = add float %tmp12, %tmp7
+    %tmp1 = fmul float %x, 3.000000e+00
+    %tmp3 = fmul float %x, 5.000000e+00
+    %tmp5 = fmul float %x, 7.000000e+00
+    %tmp7 = fmul float %x, 1.100000e+01
+    %tmp10 = fadd float %tmp1, %tmp3
+    %tmp12 = fadd float %tmp10, %tmp5
+    %tmp14 = fadd float %tmp12, %tmp7
     ret float %tmp14
 }
diff --git a/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll b/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
index d1d0ea845781..c03d982aeb15 100644
--- a/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
+++ b/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
@@ -139,7 +139,7 @@ b341:
 	%r353 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r352
 	%r354 = load float* %r353
 	%r362 = load float* bitcast ([128 x i64]* @i6000 to float*)
-	%r363 = add float 0.000000e+00, %r362
+	%r363 = fadd float 0.000000e+00, %r362
 	%r370 = load float* bitcast ([128 x i64]* @i6000 to float*)
 	%r376 = icmp slt i64 %r16, 0
 	br i1 %r376, label %b377, label %a35b
@@ -155,11 +155,11 @@ a35b:
 	%e785 = shl i64 %w1865, 0
 	%b1877 = mul i64 %w1865, 0
 	%s795 = add i64 %b1877, 0
-	%r399 = add float %r354, 0.000000e+00
-	%r402 = add float %r370, 0.000000e+00
-	%r403 = add float %r348, 0.000000e+00
+	%r399 = fadd float %r354, 0.000000e+00
+	%r402 = fadd float %r370, 0.000000e+00
+	%r403 = fadd float %r348, 0.000000e+00
 	%r411 = add i64 %s795, 0
-	%r431 = add float %r362, 0.000000e+00
+	%r431 = fadd float %r362, 0.000000e+00
 	%r454 = add i64 %e785, 0
 	%r457 = add i64 %e785, 0
 	%r459 = icmp slt i64 %r457, 0
@@ -230,21 +230,21 @@ a45b714:
 	%r750 = add i64 %r717, 0
 	%r751 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r750
 	%r752 = load float* %r751
-	%r753 = add float %r752, %r746
-	%r754 = add float %r728, %r722
-	%r755 = add float %r734, %r754
-	%r756 = add float %r755, %r740
-	%r757 = add float %r753, %r756
-	%r759 = add float %r757, %r540
+	%r753 = fadd float %r752, %r746
+	%r754 = fadd float %r728, %r722
+	%r755 = fadd float %r734, %r754
+	%r756 = fadd float %r755, %r740
+	%r757 = fadd float %r753, %r756
+	%r759 = fadd float %r757, %r540
 	%r770 = add i64 %r717, 0
 	%r771 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r770
 	%r772 = load float* %r771
 	%r776 = add i64 %r717, 0
 	%r777 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r776
 	%r778 = load float* %r777
-	%r781 = add float %r363, %r772
-	%r782 = add float %r781, %r778
-	%r783 = add float %r551, %r782
+	%r781 = fadd float %r363, %r772
+	%r782 = fadd float %r781, %r778
+	%r783 = fadd float %r551, %r782
 	br label %b712
 a57b:
 	br i1 %r335, label %a66b, label %b1086
@@ -310,10 +310,10 @@ a53b1019:
 	%r1035 = load float* %r1034
 	%r1037 = bitcast i8* %c22010 to float*
 	%r1040 = getelementptr float* %r1037, i64 %r1025
-	%r1044 = add float %r864, %r1035
-	%r1046 = add float %r870, %r1027
-	%r1047 = add float %r1044, %r1046
-	%r1048 = add float %r851, %r1047
+	%r1044 = fadd float %r864, %r1035
+	%r1046 = fadd float %r870, %r1027
+	%r1047 = fadd float %r1044, %r1046
+	%r1048 = fadd float %r851, %r1047
 	%v1886 = add i64 %w1885, 0
 	%u1890 = icmp slt i64 %v1886, %b1889
 	br i1 %u1890, label %b1016, label %a53b1019
@@ -341,7 +341,7 @@ b1117:
 	%r1132 = bitcast i8* %c22012 to float*
 	%r1134 = getelementptr float* %r1132, i64 %w1915
 	%r1135 = load float* %r1134
-	%r1136 = add float %r1123, %r1135
+	%r1136 = fadd float %r1123, %r1135
 	%r1138 = icmp slt i64 %r1114, 0
 	br i1 %r1138, label %b1139, label %a63b
 b1139:
@@ -387,7 +387,7 @@ b1263:
 a63b1266:
 	%w1944 = phi i64 [ 0, %a63b1266q ], [ %v1945, %a63b1266 ]
 	%s1377 = phi i64 [ %s1374, %a63b1266q ], [ %r1297, %a63b1266 ]
-	%r1282 = add float %r1136, 0.000000e+00
+	%r1282 = fadd float %r1136, 0.000000e+00
 	%r1297 = add i64 %s1377, 0
 	%v1945 = add i64 %w1944, 0
 	%u1949 = icmp slt i64 %v1945, %b1948
@@ -418,7 +418,7 @@ a74b:
 	%r1379 = add i64 %s1543, 0
 	%r1403 = add i64 %r1355, 0
 	%r1422 = add i64 %r1348, 0
-	%r1526 = add float %r1372, 0.000000e+00
+	%r1526 = fadd float %r1372, 0.000000e+00
 	%r1573 = add i64 %w1958, 0
 	%r1581 = icmp slt i64 %r1573, 0
 	%v1959 = add i64 %w1958, 0
@@ -448,10 +448,10 @@ a97b:
 	%r1763 = load float* %r1762
 	%r1767 = add i64 %r1622, 0
 	%r1768 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1767
-	%r1772 = add float %r1763, 0.000000e+00
-	%r1773 = add float %r1772, 0.000000e+00
-	%r1809 = add float %r1757, 0.000000e+00
-	%r1810 = add float %r1773, %r1809
+	%r1772 = fadd float %r1763, 0.000000e+00
+	%r1773 = fadd float %r1772, 0.000000e+00
+	%r1809 = fadd float %r1757, 0.000000e+00
+	%r1810 = fadd float %r1773, %r1809
 	store float %r1810, float* %r1768
 	%r1818 = add i64 %w1970, 0
 	%r1826 = icmp slt i64 %r1818, 0
diff --git a/test/CodeGen/X86/2007-03-01-SpillerCrash.ll b/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
index d4176f1d78cb..721b6e7e2094 100644
--- a/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
+++ b/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
@@ -3,12 +3,12 @@
 
 define void @test() nounwind {
 test.exit:
-	mul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:0 [#uses=4]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:0 [#uses=4]
 	load <4 x float>* null		; <<4 x float>>:1 [#uses=1]
 	shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>>:2 [#uses=1]
-	mul <4 x float> %0, %2		; <<4 x float>>:3 [#uses=1]
-	sub <4 x float> zeroinitializer, %3		; <<4 x float>>:4 [#uses=1]
-	mul <4 x float> %4, zeroinitializer		; <<4 x float>>:5 [#uses=2]
+	fmul <4 x float> %0, %2		; <<4 x float>>:3 [#uses=1]
+	fsub <4 x float> zeroinitializer, %3		; <<4 x float>>:4 [#uses=1]
+	fmul <4 x float> %4, zeroinitializer		; <<4 x float>>:5 [#uses=2]
 	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:6 [#uses=1]
 	and <4 x i32> %6, < i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647 >		; <<4 x i32>>:7 [#uses=1]
 	bitcast <4 x i32> %7 to <4 x float>		; <<4 x float>>:8 [#uses=2]
@@ -23,13 +23,13 @@ test.exit:
 	br i1 false, label %19, label %13
 
 ; <label>:13		; preds = %12
-	sub float -0.000000e+00, 0.000000e+00		; <float>:14 [#uses=1]
+	fsub float -0.000000e+00, 0.000000e+00		; <float>:14 [#uses=1]
 	%tmp207 = extractelement <4 x float> zeroinitializer, i32 0		; <float> [#uses=1]
 	%tmp208 = extractelement <4 x float> zeroinitializer, i32 2		; <float> [#uses=1]
-	sub float -0.000000e+00, %tmp208		; <float>:15 [#uses=1]
+	fsub float -0.000000e+00, %tmp208		; <float>:15 [#uses=1]
 	%tmp155 = extractelement <4 x float> zeroinitializer, i32 0		; <float> [#uses=1]
 	%tmp156 = extractelement <4 x float> zeroinitializer, i32 2		; <float> [#uses=1]
-	sub float -0.000000e+00, %tmp156		; <float>:16 [#uses=1]
+	fsub float -0.000000e+00, %tmp156		; <float>:16 [#uses=1]
 	br label %19
 
 ; <label>:17		; preds = %11
@@ -54,7 +54,7 @@ test.exit:
 	insertelement <4 x float> %31, float %25, i32 2		; <<4 x float>>:32 [#uses=1]
 	insertelement <4 x float> %32, float %25, i32 3		; <<4 x float>>:33 [#uses=1]
 	fdiv <4 x float> %33, zeroinitializer		; <<4 x float>>:34 [#uses=1]
-	mul <4 x float> %34, < float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01 >		; <<4 x float>>:35 [#uses=1]
+	fmul <4 x float> %34, < float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01 >		; <<4 x float>>:35 [#uses=1]
 	insertelement <4 x float> undef, float %22, i32 0		; <<4 x float>>:36 [#uses=1]
 	insertelement <4 x float> %36, float %21, i32 1		; <<4 x float>>:37 [#uses=0]
 	br i1 false, label %foo.exit, label %38
@@ -64,17 +64,17 @@ test.exit:
 	fcmp ogt float %39, 0.000000e+00		; <i1>:40 [#uses=1]
 	extractelement <4 x float> %0, i32 2		; <float>:41 [#uses=1]
 	extractelement <4 x float> %0, i32 1		; <float>:42 [#uses=1]
-	sub float -0.000000e+00, %42		; <float>:43 [#uses=2]
+	fsub float -0.000000e+00, %42		; <float>:43 [#uses=2]
 	%tmp189 = extractelement <4 x float> %5, i32 2		; <float> [#uses=1]
 	br i1 %40, label %44, label %46
 
 ; <label>:44		; preds = %38
-	sub float -0.000000e+00, %tmp189		; <float>:45 [#uses=0]
+	fsub float -0.000000e+00, %tmp189		; <float>:45 [#uses=0]
 	br label %foo.exit
 
 ; <label>:46		; preds = %38
 	%tmp192 = extractelement <4 x float> %5, i32 1		; <float> [#uses=1]
-	sub float -0.000000e+00, %tmp192		; <float>:47 [#uses=1]
+	fsub float -0.000000e+00, %tmp192		; <float>:47 [#uses=1]
 	br label %foo.exit
 
 foo.exit:		; preds = %46, %44, %19
diff --git a/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll b/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll
index ed5a1943a262..514d6656cd2a 100644
--- a/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll
+++ b/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll
@@ -11,7 +11,7 @@ bb:		; preds = %bb, %cond_true10
 	%tmp52 = bitcast <4 x float> %tmp49 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp53 = call <4 x i32> @llvm.x86.sse2.psll.d( <4 x i32> %tmp52, <4 x i32> < i32 8, i32 undef, i32 undef, i32 undef > )		; <<4 x i32>> [#uses=1]
 	%tmp105 = bitcast <4 x i32> %tmp53 to <4 x float>		; <<4 x float>> [#uses=1]
-	%tmp108 = sub <4 x float> zeroinitializer, %tmp105		; <<4 x float>> [#uses=0]
+	%tmp108 = fsub <4 x float> zeroinitializer, %tmp105		; <<4 x float>> [#uses=0]
 	br label %bb
 
 return:		; preds = %entry
diff --git a/test/CodeGen/X86/2007-04-24-VectorCrash.ll b/test/CodeGen/X86/2007-04-24-VectorCrash.ll
index ce23da0b535f..3e08e50f09de 100644
--- a/test/CodeGen/X86/2007-04-24-VectorCrash.ll
+++ b/test/CodeGen/X86/2007-04-24-VectorCrash.ll
@@ -8,8 +8,8 @@ define void @test(float* %P) {
 entry:
 	or <4 x i32> zeroinitializer, and (<4 x i32> bitcast (<4 x float> shufflevector (<4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer) to <4 x i32>), <4 x i32> < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 >)		; <<4 x i32>>:0 [#uses=1]
 	bitcast <4 x i32> %0 to <4 x float>		; <<4 x float>>:1 [#uses=1]
-	sub <4 x float> %1, zeroinitializer		; <<4 x float>>:2 [#uses=1]
-	sub <4 x float> shufflevector (<4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer), %2		; <<4 x float>>:3 [#uses=1]
+	fsub <4 x float> %1, zeroinitializer		; <<4 x float>>:2 [#uses=1]
+	fsub <4 x float> shufflevector (<4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer), %2		; <<4 x float>>:3 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %3, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:4 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %4, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:5 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %5, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:6 [#uses=1]
@@ -29,19 +29,19 @@ entry:
 	shufflevector <4 x float> zeroinitializer, <4 x float> %19, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:20 [#uses=1]
 	shufflevector <4 x float> %20, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:21 [#uses=1]
 	shufflevector <4 x float> %21, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:22 [#uses=1]
-	mul <4 x float> %22, zeroinitializer		; <<4 x float>>:23 [#uses=1]
+	fmul <4 x float> %22, zeroinitializer		; <<4 x float>>:23 [#uses=1]
 	shufflevector <4 x float> %23, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>>:24 [#uses=1]
 	call <4 x float> @llvm.x86.sse.add.ss( <4 x float> zeroinitializer, <4 x float> %24 )		; <<4 x float>>:25 [#uses=1]
 	shufflevector <4 x float> %25, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:26 [#uses=1]
 	shufflevector <4 x float> %26, <4 x float> zeroinitializer, <4 x i32> zeroinitializer		; <<4 x float>>:27 [#uses=1]
 	shufflevector <4 x float> %27, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 1, i32 6, i32 7 >		; <<4 x float>>:28 [#uses=1]
-	mul <4 x float> zeroinitializer, %28		; <<4 x float>>:29 [#uses=1]
-	add <4 x float> %29, zeroinitializer		; <<4 x float>>:30 [#uses=1]
-	mul <4 x float> zeroinitializer, %30		; <<4 x float>>:31 [#uses=1]
+	fmul <4 x float> zeroinitializer, %28		; <<4 x float>>:29 [#uses=1]
+	fadd <4 x float> %29, zeroinitializer		; <<4 x float>>:30 [#uses=1]
+	fmul <4 x float> zeroinitializer, %30		; <<4 x float>>:31 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %31, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:32 [#uses=1]
-	mul <4 x float> zeroinitializer, %32		; <<4 x float>>:33 [#uses=1]
+	fmul <4 x float> zeroinitializer, %32		; <<4 x float>>:33 [#uses=1]
 	shufflevector <4 x float> %33, <4 x float> zeroinitializer, <4 x i32> zeroinitializer		; <<4 x float>>:34 [#uses=1]
-	mul <4 x float> zeroinitializer, %34		; <<4 x float>>:35 [#uses=1]
+	fmul <4 x float> zeroinitializer, %34		; <<4 x float>>:35 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %35, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >		; <<4 x float>>:36 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %36, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:37 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %37, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:38 [#uses=1]
@@ -56,7 +56,7 @@ entry:
 	shufflevector <4 x float> zeroinitializer, <4 x float> %46, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:47 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %47, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:48 [#uses=1]
 	shufflevector <4 x float> %48, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>>:49 [#uses=1]
-	add <4 x float> %49, zeroinitializer		; <<4 x float>>:50 [#uses=1]
+	fadd <4 x float> %49, zeroinitializer		; <<4 x float>>:50 [#uses=1]
 	%tmp5845 = extractelement <4 x float> %50, i32 2		; <float> [#uses=1]
 	store float %tmp5845, float* %P
 	ret void
diff --git a/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll b/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
index 11fb8e3f2c84..66a58c73e824 100644
--- a/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
+++ b/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
@@ -2,10 +2,10 @@
 
 define void @test(<4 x float>* %arg) {
 	%tmp89 = getelementptr <4 x float>* %arg, i64 3
-	%tmp1144 = sub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, zeroinitializer
+	%tmp1144 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, zeroinitializer
 	store <4 x float> %tmp1144, <4 x float>* null
 	%tmp1149 = load <4 x float>* %tmp89
-	%tmp1150 = sub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1149
+	%tmp1150 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1149
 	store <4 x float> %tmp1150, <4 x float>* %tmp89
 	ret void
 }
diff --git a/test/CodeGen/X86/2007-07-10-StackerAssert.ll b/test/CodeGen/X86/2007-07-10-StackerAssert.ll
index 120284f80c5c..7f09b5275a05 100644
--- a/test/CodeGen/X86/2007-07-10-StackerAssert.ll
+++ b/test/CodeGen/X86/2007-07-10-StackerAssert.ll
@@ -27,7 +27,7 @@ bb383:		; preds = %bb164
 
 cond_true425:		; preds = %bb383
 	%tmp430 = load float* null		; <float> [#uses=1]
-	%tmp432 = sub float %tmp430, %tmp408		; <float> [#uses=1]
+	%tmp432 = fsub float %tmp430, %tmp408		; <float> [#uses=1]
 	%tmp432433 = fpext float %tmp432 to double		; <double> [#uses=1]
 	%tmp434435 = fpext float %tmp408 to double		; <double> [#uses=1]
 	call void (i8*, ...)* @PR_LogPrint( i8* getelementptr ([56 x i8]* @.str97, i32 0, i32 0), double 0.000000e+00, double %tmp434435, double %tmp432433 )
diff --git a/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll b/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
index 142bcd334787..835e4caf0aaf 100644
--- a/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
+++ b/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
@@ -22,7 +22,7 @@ entry:
 
 	%tmp1406.i1367.i = shufflevector <4 x float> %tmp2723.i1170.i, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>> [#uses=1]
 	%tmp84.i1413.i = load <4 x float>* %.sub6235.i		; <<4 x float>> [#uses=1]
-	%tmp89.i1415.i = mul <4 x float> %tmp84.i1413.i, %tmp1406.i1367.i		; <<4 x float>> [#uses=1]
+	%tmp89.i1415.i = fmul <4 x float> %tmp84.i1413.i, %tmp1406.i1367.i		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp89.i1415.i, <4 x float>* %.sub.i
         ret i16 0
 }
diff --git a/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll b/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
index 3a3c11377380..fd914a1687b7 100644
--- a/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
+++ b/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
@@ -5,38 +5,38 @@ entry:
 	br i1 true, label %bb171.preheader, label %bb431
 
 bb171.preheader:		; preds = %entry
-	%tmp176 = add float 0.000000e+00, 1.000000e+00		; <float> [#uses=2]
+	%tmp176 = fadd float 0.000000e+00, 1.000000e+00		; <float> [#uses=2]
 	%gi.1 = getelementptr float* %fz, i32 0		; <float*> [#uses=2]
 	%tmp240 = load float* %gi.1, align 4		; <float> [#uses=1]
-	%tmp242 = sub float %tmp240, 0.000000e+00		; <float> [#uses=2]
+	%tmp242 = fsub float %tmp240, 0.000000e+00		; <float> [#uses=2]
 	%tmp251 = getelementptr float* %fz, i32 0		; <float*> [#uses=1]
 	%tmp252 = load float* %tmp251, align 4		; <float> [#uses=1]
 	%tmp258 = getelementptr float* %fz, i32 0		; <float*> [#uses=2]
 	%tmp259 = load float* %tmp258, align 4		; <float> [#uses=2]
-	%tmp261 = mul float %tmp259, %tmp176		; <float> [#uses=1]
-	%tmp262 = sub float 0.000000e+00, %tmp261		; <float> [#uses=2]
-	%tmp269 = mul float %tmp252, %tmp176		; <float> [#uses=1]
-	%tmp276 = mul float %tmp259, 0.000000e+00		; <float> [#uses=1]
-	%tmp277 = add float %tmp269, %tmp276		; <float> [#uses=2]
+	%tmp261 = fmul float %tmp259, %tmp176		; <float> [#uses=1]
+	%tmp262 = fsub float 0.000000e+00, %tmp261		; <float> [#uses=2]
+	%tmp269 = fmul float %tmp252, %tmp176		; <float> [#uses=1]
+	%tmp276 = fmul float %tmp259, 0.000000e+00		; <float> [#uses=1]
+	%tmp277 = fadd float %tmp269, %tmp276		; <float> [#uses=2]
 	%tmp281 = getelementptr float* %fz, i32 0		; <float*> [#uses=1]
 	%tmp282 = load float* %tmp281, align 4		; <float> [#uses=2]
-	%tmp284 = sub float %tmp282, %tmp277		; <float> [#uses=1]
-	%tmp291 = add float %tmp282, %tmp277		; <float> [#uses=1]
-	%tmp298 = sub float 0.000000e+00, %tmp262		; <float> [#uses=1]
-	%tmp305 = add float 0.000000e+00, %tmp262		; <float> [#uses=1]
-	%tmp315 = mul float 0.000000e+00, %tmp291		; <float> [#uses=1]
-	%tmp318 = mul float 0.000000e+00, %tmp298		; <float> [#uses=1]
-	%tmp319 = add float %tmp315, %tmp318		; <float> [#uses=1]
-	%tmp329 = add float 0.000000e+00, %tmp319		; <float> [#uses=1]
+	%tmp284 = fsub float %tmp282, %tmp277		; <float> [#uses=1]
+	%tmp291 = fadd float %tmp282, %tmp277		; <float> [#uses=1]
+	%tmp298 = fsub float 0.000000e+00, %tmp262		; <float> [#uses=1]
+	%tmp305 = fadd float 0.000000e+00, %tmp262		; <float> [#uses=1]
+	%tmp315 = fmul float 0.000000e+00, %tmp291		; <float> [#uses=1]
+	%tmp318 = fmul float 0.000000e+00, %tmp298		; <float> [#uses=1]
+	%tmp319 = fadd float %tmp315, %tmp318		; <float> [#uses=1]
+	%tmp329 = fadd float 0.000000e+00, %tmp319		; <float> [#uses=1]
 	store float %tmp329, float* null, align 4
-	%tmp336 = sub float %tmp242, 0.000000e+00		; <float> [#uses=1]
+	%tmp336 = fsub float %tmp242, 0.000000e+00		; <float> [#uses=1]
 	store float %tmp336, float* %tmp258, align 4
-	%tmp343 = add float %tmp242, 0.000000e+00		; <float> [#uses=1]
+	%tmp343 = fadd float %tmp242, 0.000000e+00		; <float> [#uses=1]
 	store float %tmp343, float* null, align 4
-	%tmp355 = mul float 0.000000e+00, %tmp305		; <float> [#uses=1]
-	%tmp358 = mul float 0.000000e+00, %tmp284		; <float> [#uses=1]
-	%tmp359 = add float %tmp355, %tmp358		; <float> [#uses=1]
-	%tmp369 = add float 0.000000e+00, %tmp359		; <float> [#uses=1]
+	%tmp355 = fmul float 0.000000e+00, %tmp305		; <float> [#uses=1]
+	%tmp358 = fmul float 0.000000e+00, %tmp284		; <float> [#uses=1]
+	%tmp359 = fadd float %tmp355, %tmp358		; <float> [#uses=1]
+	%tmp369 = fadd float 0.000000e+00, %tmp359		; <float> [#uses=1]
 	store float %tmp369, float* %gi.1, align 4
 	ret void
 
diff --git a/test/CodeGen/X86/2007-11-02-BadAsm.ll b/test/CodeGen/X86/2007-11-02-BadAsm.ll
index 7fe8eaf3abc7..4ae4d2f9e8d9 100644
--- a/test/CodeGen/X86/2007-11-02-BadAsm.ll
+++ b/test/CodeGen/X86/2007-11-02-BadAsm.ll
@@ -45,7 +45,7 @@ cond_true.i34.i:		; preds = %xit.i
 cond_next.i79.i:		; preds = %xit.i
 	%phitmp167.i = fptosi double 0.000000e+00 to i64		; <i64> [#uses=1]
 	%tmp142143.i = fpext float %tmp6162.i.i to double		; <double> [#uses=1]
-	%tmp2.i139.i = add double %tmp142143.i, 5.000000e-01		; <double> [#uses=1]
+	%tmp2.i139.i = fadd double %tmp142143.i, 5.000000e-01		; <double> [#uses=1]
 	%tmp23.i140.i = fptosi double %tmp2.i139.i to i64		; <i64> [#uses=1]
 	br i1 false, label %cond_true.i143.i, label %round_coord.exit148.i
 
@@ -60,7 +60,7 @@ round_coord.exit148.i:		; preds = %cond_true.i143.i, %cond_next.i79.i
 	%tmp144149.i = phi i32 [ 32767, %cond_next.i79.i ], [ -32767, %cond_true.i143.i ]		; <i32> [#uses=1]
 	store i32 %tmp144149.i, i32* null, align 8
 	%tmp147148.i = fpext float %tmp67.i15.i to double		; <double> [#uses=1]
-	%tmp2.i128.i = add double %tmp147148.i, 5.000000e-01		; <double> [#uses=1]
+	%tmp2.i128.i = fadd double %tmp147148.i, 5.000000e-01		; <double> [#uses=1]
 	%tmp23.i129.i = fptosi double %tmp2.i128.i to i64		; <i64> [#uses=2]
 	%tmp5.i130.i = icmp slt i64 %tmp23.i129.i, 32768		; <i1> [#uses=1]
 	br i1 %tmp5.i130.i, label %cond_true.i132.i, label %round_coord.exit137.i
diff --git a/test/CodeGen/X86/2007-11-06-InstrSched.ll b/test/CodeGen/X86/2007-11-06-InstrSched.ll
index 605fb551205b..a4e44e1f4e1d 100644
--- a/test/CodeGen/X86/2007-11-06-InstrSched.ll
+++ b/test/CodeGen/X86/2007-11-06-InstrSched.ll
@@ -13,8 +13,8 @@ bb18:		; preds = %bb18, %entry
 	%tmp45 = sitofp i32 %tmp4 to float		; <float> [#uses=1]
 	%tmp8 = getelementptr float* %y, i32 %i.0.reg2mem.0		; <float*> [#uses=1]
 	%tmp9 = load float* %tmp8, align 4		; <float> [#uses=1]
-	%tmp11 = mul float %tmp9, %tmp45		; <float> [#uses=1]
-	%tmp14 = add float %tmp11, %res.0.reg2mem.0		; <float> [#uses=2]
+	%tmp11 = fmul float %tmp9, %tmp45		; <float> [#uses=1]
+	%tmp14 = fadd float %tmp11, %res.0.reg2mem.0		; <float> [#uses=2]
 	%tmp17 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
 	%tmp21 = icmp ult i32 %tmp17, %c		; <i1> [#uses=1]
 	br i1 %tmp21, label %bb18, label %bb23
diff --git a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
index 1b36fcec67ab..46422bcf2c50 100644
--- a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
+++ b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
@@ -41,8 +41,8 @@ bb.i28.i:		; preds = %bb.i28.i, %cond_next36.i
 	%x.0.i21.i = select i1 %tmp4.i19.i, i32 %tmp1.i18.i, i32 0		; <i32> [#uses=1]
 	%tmp41.sum.i = add i32 %j.0.reg2mem.0.i16.i, 2		; <i32> [#uses=0]
 	%tmp1213.i23.i = sitofp i32 %x.0.i21.i to double		; <double> [#uses=1]
-	%tmp15.i24.i = sub double 0.000000e+00, %tmp1213.i23.i		; <double> [#uses=1]
-	%tmp16.i25.i = mul double 0.000000e+00, %tmp15.i24.i		; <double> [#uses=1]
+	%tmp15.i24.i = fsub double 0.000000e+00, %tmp1213.i23.i		; <double> [#uses=1]
+	%tmp16.i25.i = fmul double 0.000000e+00, %tmp15.i24.i		; <double> [#uses=1]
 	%indvar.next39.i = add i32 %j.0.reg2mem.0.i16.i, 2		; <i32> [#uses=2]
 	%exitcond40.i = icmp eq i32 %indvar.next39.i, %tmp8.i14.i		; <i1> [#uses=1]
 	br i1 %exitcond40.i, label %mp_unexp_d2mp.exit29.i, label %bb.i28.i
diff --git a/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll b/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll
index 84229cf49128..cb7a3dcd33cb 100644
--- a/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll
+++ b/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll
@@ -288,7 +288,7 @@ invcont1640:		; preds = %tmp9.i3799.noexc
 			to label %invcont1642 unwind label %lpad3845		; <i8*> [#uses=0]
 
 invcont1642:		; preds = %invcont1640
-	%tmp18.i3770 = sub double %tmp3.i3778, 0.000000e+00		; <double> [#uses=0]
+	%tmp18.i3770 = fsub double %tmp3.i3778, 0.000000e+00		; <double> [#uses=0]
 	invoke fastcc void @_ZN7mrScene9AddObjectEP9mrSurfaceRK8ggStringS4_i( %struct.mrScene* %this, %struct.ggBRDF* null, %struct.ggString* null, %struct.ggString* null, i32 0 )
 			to label %bb3743 unwind label %lpad3845
 
diff --git a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
index 83ca3e3ac720..38020c1e3ea8 100644
--- a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
+++ b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
@@ -4,29 +4,29 @@ define void @SolveCubic(double %a, double %b, double %c, double %d, i32* %soluti
 entry:
 	%tmp71 = load x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
 	%tmp72 = fdiv x86_fp80 %tmp71, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
-	%tmp73 = add x86_fp80 0xK00000000000000000000, %tmp72		; <x86_fp80> [#uses=1]
+	%tmp73 = fadd x86_fp80 0xK00000000000000000000, %tmp72		; <x86_fp80> [#uses=1]
 	%tmp7374 = fptrunc x86_fp80 %tmp73 to double		; <double> [#uses=1]
 	store double %tmp7374, double* null, align 8
 	%tmp81 = load double* null, align 8		; <double> [#uses=1]
-	%tmp82 = add double %tmp81, 0x401921FB54442D18		; <double> [#uses=1]
+	%tmp82 = fadd double %tmp81, 0x401921FB54442D18		; <double> [#uses=1]
 	%tmp83 = fdiv double %tmp82, 3.000000e+00		; <double> [#uses=1]
 	%tmp84 = call double @cos( double %tmp83 )		; <double> [#uses=1]
-	%tmp85 = mul double 0.000000e+00, %tmp84		; <double> [#uses=1]
+	%tmp85 = fmul double 0.000000e+00, %tmp84		; <double> [#uses=1]
 	%tmp8586 = fpext double %tmp85 to x86_fp80		; <x86_fp80> [#uses=1]
 	%tmp87 = load x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
 	%tmp88 = fdiv x86_fp80 %tmp87, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
-	%tmp89 = add x86_fp80 %tmp8586, %tmp88		; <x86_fp80> [#uses=1]
+	%tmp89 = fadd x86_fp80 %tmp8586, %tmp88		; <x86_fp80> [#uses=1]
 	%tmp8990 = fptrunc x86_fp80 %tmp89 to double		; <double> [#uses=1]
 	store double %tmp8990, double* null, align 8
 	%tmp97 = load double* null, align 8		; <double> [#uses=1]
-	%tmp98 = add double %tmp97, 0x402921FB54442D18		; <double> [#uses=1]
+	%tmp98 = fadd double %tmp97, 0x402921FB54442D18		; <double> [#uses=1]
 	%tmp99 = fdiv double %tmp98, 3.000000e+00		; <double> [#uses=1]
 	%tmp100 = call double @cos( double %tmp99 )		; <double> [#uses=1]
-	%tmp101 = mul double 0.000000e+00, %tmp100		; <double> [#uses=1]
+	%tmp101 = fmul double 0.000000e+00, %tmp100		; <double> [#uses=1]
 	%tmp101102 = fpext double %tmp101 to x86_fp80		; <x86_fp80> [#uses=1]
 	%tmp103 = load x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
 	%tmp104 = fdiv x86_fp80 %tmp103, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
-	%tmp105 = add x86_fp80 %tmp101102, %tmp104		; <x86_fp80> [#uses=1]
+	%tmp105 = fadd x86_fp80 %tmp101102, %tmp104		; <x86_fp80> [#uses=1]
 	%tmp105106 = fptrunc x86_fp80 %tmp105 to double		; <double> [#uses=1]
 	store double %tmp105106, double* null, align 8
 	ret void
diff --git a/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll b/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
index f1300fa3d79e..6db6537aed26 100644
--- a/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
+++ b/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
@@ -3,13 +3,13 @@
 define void @casin({ double, double }* sret  %agg.result, double %z.0, double %z.1) nounwind  {
 entry:
 	%memtmp = alloca { double, double }, align 8		; <{ double, double }*> [#uses=3]
-	%tmp4 = sub double -0.000000e+00, %z.1		; <double> [#uses=1]
+	%tmp4 = fsub double -0.000000e+00, %z.1		; <double> [#uses=1]
 	call void @casinh( { double, double }* sret  %memtmp, double %tmp4, double %z.0 ) nounwind 
 	%tmp19 = getelementptr { double, double }* %memtmp, i32 0, i32 0		; <double*> [#uses=1]
 	%tmp20 = load double* %tmp19, align 8		; <double> [#uses=1]
 	%tmp22 = getelementptr { double, double }* %memtmp, i32 0, i32 1		; <double*> [#uses=1]
 	%tmp23 = load double* %tmp22, align 8		; <double> [#uses=1]
-	%tmp32 = sub double -0.000000e+00, %tmp20		; <double> [#uses=1]
+	%tmp32 = fsub double -0.000000e+00, %tmp20		; <double> [#uses=1]
 	%tmp37 = getelementptr { double, double }* %agg.result, i32 0, i32 0		; <double*> [#uses=1]
 	store double %tmp23, double* %tmp37, align 8
 	%tmp40 = getelementptr { double, double }* %agg.result, i32 0, i32 1		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll b/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
index b3fe9abd52a3..230af57fea8c 100644
--- a/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
+++ b/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
@@ -33,23 +33,23 @@ bb5.i.i31:		; preds = %bb3.i27
 	br i1 %tmp10.i.i30, label %bb13.i.i37, label %bb30.i.i43
 
 bb13.i.i37:		; preds = %bb5.i.i31
-	%tmp15.i.i32 = sub double -0.000000e+00, %tmp22.i25		; <double> [#uses=1]
+	%tmp15.i.i32 = fsub double -0.000000e+00, %tmp22.i25		; <double> [#uses=1]
 	%tmp17.i.i33 = fdiv double %tmp15.i.i32, %tmp12.i23		; <double> [#uses=3]
-	%tmp20.i4.i = mul double %tmp17.i.i33, %tmp17.i.i33		; <double> [#uses=1]
-	%tmp21.i.i34 = add double %tmp20.i4.i, 1.000000e+00		; <double> [#uses=1]
+	%tmp20.i4.i = fmul double %tmp17.i.i33, %tmp17.i.i33		; <double> [#uses=1]
+	%tmp21.i.i34 = fadd double %tmp20.i4.i, 1.000000e+00		; <double> [#uses=1]
 	%tmp22.i.i35 = call double @llvm.sqrt.f64( double %tmp21.i.i34 ) nounwind 		; <double> [#uses=1]
 	%tmp23.i5.i = fdiv double 1.000000e+00, %tmp22.i.i35		; <double> [#uses=2]
-	%tmp28.i.i36 = mul double %tmp23.i5.i, %tmp17.i.i33		; <double> [#uses=1]
+	%tmp28.i.i36 = fmul double %tmp23.i5.i, %tmp17.i.i33		; <double> [#uses=1]
 	br label %Givens.exit.i49
 
 bb30.i.i43:		; preds = %bb5.i.i31
-	%tmp32.i.i38 = sub double -0.000000e+00, %tmp12.i23		; <double> [#uses=1]
+	%tmp32.i.i38 = fsub double -0.000000e+00, %tmp12.i23		; <double> [#uses=1]
 	%tmp34.i.i39 = fdiv double %tmp32.i.i38, %tmp22.i25		; <double> [#uses=3]
-	%tmp37.i6.i = mul double %tmp34.i.i39, %tmp34.i.i39		; <double> [#uses=1]
-	%tmp38.i.i40 = add double %tmp37.i6.i, 1.000000e+00		; <double> [#uses=1]
+	%tmp37.i6.i = fmul double %tmp34.i.i39, %tmp34.i.i39		; <double> [#uses=1]
+	%tmp38.i.i40 = fadd double %tmp37.i6.i, 1.000000e+00		; <double> [#uses=1]
 	%tmp39.i7.i = call double @llvm.sqrt.f64( double %tmp38.i.i40 ) nounwind 		; <double> [#uses=1]
 	%tmp40.i.i41 = fdiv double 1.000000e+00, %tmp39.i7.i		; <double> [#uses=2]
-	%tmp45.i.i42 = mul double %tmp40.i.i41, %tmp34.i.i39		; <double> [#uses=1]
+	%tmp45.i.i42 = fmul double %tmp40.i.i41, %tmp34.i.i39		; <double> [#uses=1]
 	br label %Givens.exit.i49
 
 Givens.exit.i49:		; preds = %bb3.i27.Givens.exit.i49_crit_edge, %bb30.i.i43, %bb13.i.i37
diff --git a/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll b/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
index 96ac7a7608c8..fe0ee8a8faaf 100644
--- a/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
+++ b/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
@@ -24,20 +24,20 @@ bb35:		; preds = %bb24, %entry
 	%tmp42 = sdiv i32 %i, 9		; <i32> [#uses=1]
 	%tmp43 = add i32 %tmp42, -1		; <i32> [#uses=1]
 	%tmp4344 = sitofp i32 %tmp43 to double		; <double> [#uses=1]
-	%tmp17.i76 = mul double %tmp4344, 0.000000e+00		; <double> [#uses=1]
+	%tmp17.i76 = fmul double %tmp4344, 0.000000e+00		; <double> [#uses=1]
 	%tmp48 = sdiv i32 %i, 3		; <i32> [#uses=1]
 	%tmp49 = srem i32 %tmp48, 3		; <i32> [#uses=1]
 	%tmp50 = add i32 %tmp49, -1		; <i32> [#uses=1]
 	%tmp5051 = sitofp i32 %tmp50 to double		; <double> [#uses=1]
-	%tmp17.i63 = mul double %tmp5051, 0.000000e+00		; <double> [#uses=1]
+	%tmp17.i63 = fmul double %tmp5051, 0.000000e+00		; <double> [#uses=1]
 	%tmp55 = srem i32 %i, 3		; <i32> [#uses=1]
 	%tmp56 = add i32 %tmp55, -1		; <i32> [#uses=1]
 	%tmp5657 = sitofp i32 %tmp56 to double		; <double> [#uses=1]
 	%tmp15.i49 = getelementptr %struct.Lattice* %this, i32 0, i32 0, i32 0		; <double*> [#uses=1]
 	%tmp16.i50 = load double* %tmp15.i49, align 4		; <double> [#uses=1]
-	%tmp17.i = mul double %tmp5657, %tmp16.i50		; <double> [#uses=1]
-	%tmp20.i39 = add double %tmp17.i, %tmp17.i63		; <double> [#uses=1]
-	%tmp20.i23 = add double %tmp20.i39, %tmp17.i76		; <double> [#uses=1]
+	%tmp17.i = fmul double %tmp5657, %tmp16.i50		; <double> [#uses=1]
+	%tmp20.i39 = fadd double %tmp17.i, %tmp17.i63		; <double> [#uses=1]
+	%tmp20.i23 = fadd double %tmp20.i39, %tmp17.i76		; <double> [#uses=1]
 	br i1 false, label %bb58.preheader, label %bb81
 
 bb58.preheader:		; preds = %bb35
@@ -55,7 +55,7 @@ bb58:		; preds = %bb58, %bb58.preheader
 	%tmp95.i = and i32 %tmp88.i, -268435456		; <i32> [#uses=1]
 	%tmp97.i = or i32 0, %tmp95.i		; <i32> [#uses=1]
 	store i32 %tmp97.i, i32* %tmp25.i, align 1
-	%tmp6.i = add double 0.000000e+00, %tmp20.i23		; <double> [#uses=0]
+	%tmp6.i = fadd double 0.000000e+00, %tmp20.i23		; <double> [#uses=0]
 	%exitcond96 = icmp eq i32 0, %smax		; <i1> [#uses=1]
 	br i1 %exitcond96, label %bb81, label %bb58
 
diff --git a/test/CodeGen/X86/2008-02-27-PEICrash.ll b/test/CodeGen/X86/2008-02-27-PEICrash.ll
index b644d8f5863a..055eabb43a63 100644
--- a/test/CodeGen/X86/2008-02-27-PEICrash.ll
+++ b/test/CodeGen/X86/2008-02-27-PEICrash.ll
@@ -13,21 +13,21 @@ bb56:		; preds = %bb33, %entry
 	%a.pn = phi float [ %a, %bb33 ], [ %b, %entry ]		; <float> [#uses=1]
 	%tmp41.pn508 = phi float [ 0.000000e+00, %bb33 ], [ 0.000000e+00, %entry ]		; <float> [#uses=1]
 	%tmp51.pn = phi float [ 0.000000e+00, %bb33 ], [ %a, %entry ]		; <float> [#uses=1]
-	%tmp44.pn = mul float %tmp36.pn, %b.pn509		; <float> [#uses=1]
-	%tmp46.pn = add float %tmp44.pn, %a.pn		; <float> [#uses=1]
-	%tmp53.pn = sub float 0.000000e+00, %tmp51.pn		; <float> [#uses=1]
+	%tmp44.pn = fmul float %tmp36.pn, %b.pn509		; <float> [#uses=1]
+	%tmp46.pn = fadd float %tmp44.pn, %a.pn		; <float> [#uses=1]
+	%tmp53.pn = fsub float 0.000000e+00, %tmp51.pn		; <float> [#uses=1]
 	%x.0 = fdiv float %tmp46.pn, %tmp41.pn508		; <float> [#uses=1]
 	%y.0 = fdiv float %tmp53.pn, 0.000000e+00		; <float> [#uses=1]
 	br i1 false, label %bb433, label %bb98
 
 bb98:		; preds = %bb56
-	%tmp102 = mul float 0.000000e+00, %a		; <float> [#uses=1]
-	%tmp106 = mul float 0.000000e+00, %b		; <float> [#uses=1]
+	%tmp102 = fmul float 0.000000e+00, %a		; <float> [#uses=1]
+	%tmp106 = fmul float 0.000000e+00, %b		; <float> [#uses=1]
 	br label %bb433
 
 bb433:		; preds = %bb98, %bb56
 	%x.1 = phi float [ %tmp102, %bb98 ], [ %x.0, %bb56 ]		; <float> [#uses=0]
 	%y.1 = phi float [ %tmp106, %bb98 ], [ %y.0, %bb56 ]		; <float> [#uses=1]
-	%tmp460 = add float %y.1, 0.000000e+00		; <float> [#uses=0]
+	%tmp460 = fadd float %y.1, 0.000000e+00		; <float> [#uses=0]
 	ret i64 0
 }
diff --git a/test/CodeGen/X86/2008-03-18-CoalescerBug.ll b/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
index c3b4a257353a..4b6758d6833c 100644
--- a/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
@@ -14,7 +14,7 @@ entry:
 	%tmp30 = icmp sgt i32 %delta, 0		; <i1> [#uses=1]
 	br i1 %tmp30, label %bb33, label %bb87.preheader
 bb33:		; preds = %entry
-	%tmp28 = add float 0.000000e+00, %tmp24		; <float> [#uses=1]
+	%tmp28 = fadd float 0.000000e+00, %tmp24		; <float> [#uses=1]
 	%tmp35 = fcmp ogt float %tmp28, 1.800000e+01		; <i1> [#uses=1]
 	br i1 %tmp35, label %bb38, label %bb87.preheader
 bb38:		; preds = %bb33
@@ -24,7 +24,7 @@ bb43:		; preds = %bb38
 	store i32 %tmp53, i32* null, align 4
 	ret void
 bb50:		; preds = %bb38
-	%tmp56 = sub float 1.800000e+01, %tmp24		; <float> [#uses=1]
+	%tmp56 = fsub float 1.800000e+01, %tmp24		; <float> [#uses=1]
 	%tmp57 = fcmp ugt float 0.000000e+00, %tmp56		; <i1> [#uses=1]
 	br i1 %tmp57, label %bb64, label %bb87.preheader
 bb64:		; preds = %bb50
diff --git a/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll b/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll
index 1e5ab99a9c88..53bb054795ec 100644
--- a/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll
+++ b/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll
@@ -3,10 +3,10 @@
 define void @t() {
 entry:
 	%tmp455 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> < i32 1, i32 0, i32 3, i32 2 >		; <<4 x float>> [#uses=1]
-	%tmp457 = mul <4 x float> zeroinitializer, %tmp455		; <<4 x float>> [#uses=2]
+	%tmp457 = fmul <4 x float> zeroinitializer, %tmp455		; <<4 x float>> [#uses=2]
 	%tmp461 = shufflevector <4 x float> %tmp457, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>> [#uses=1]
 	%tmp465 = shufflevector <4 x float> %tmp457, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>> [#uses=1]
-	%tmp466 = sub <4 x float> %tmp461, %tmp465		; <<4 x float>> [#uses=1]
+	%tmp466 = fsub <4 x float> %tmp461, %tmp465		; <<4 x float>> [#uses=1]
 	%tmp536 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp466, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
 	%tmp542 = shufflevector <4 x float> %tmp536, <4 x float> zeroinitializer, <4 x i32> < i32 6, i32 7, i32 2, i32 3 >		; <<4 x float>> [#uses=1]
 	%tmp580 = bitcast <4 x float> %tmp542 to <4 x i32>		; <<4 x i32>> [#uses=1]
@@ -15,10 +15,10 @@ entry:
 	%tmp592 = bitcast <4 x i32> %tmp591 to <4 x float>		; <<4 x float>> [#uses=1]
 	%tmp609 = fdiv <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, %tmp592		; <<4 x float>> [#uses=1]
 	%tmp652 = shufflevector <4 x float> %tmp609, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>> [#uses=1]
-	%tmp662 = mul <4 x float> zeroinitializer, %tmp652		; <<4 x float>> [#uses=1]
+	%tmp662 = fmul <4 x float> zeroinitializer, %tmp652		; <<4 x float>> [#uses=1]
 	%tmp678 = shufflevector <4 x float> %tmp662, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>> [#uses=1]
-	%tmp753 = mul <4 x float> zeroinitializer, %tmp678		; <<4 x float>> [#uses=1]
-	%tmp754 = sub <4 x float> zeroinitializer, %tmp753		; <<4 x float>> [#uses=1]
+	%tmp753 = fmul <4 x float> zeroinitializer, %tmp678		; <<4 x float>> [#uses=1]
+	%tmp754 = fsub <4 x float> zeroinitializer, %tmp753		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp754, <4 x float>* null, align 16
 	unreachable
 }
diff --git a/test/CodeGen/X86/2008-07-19-movups-spills.ll b/test/CodeGen/X86/2008-07-19-movups-spills.ll
index 880035715f83..ae30385e13e6 100644
--- a/test/CodeGen/X86/2008-07-19-movups-spills.ll
+++ b/test/CodeGen/X86/2008-07-19-movups-spills.ll
@@ -70,567 +70,567 @@ define void @""() {
 	load <4 x float>* @29, align 1		; <<4 x float>>:30 [#uses=31]
 	load <4 x float>* @30, align 1		; <<4 x float>>:31 [#uses=32]
 	load <4 x float>* @31, align 1		; <<4 x float>>:32 [#uses=33]
-	mul <4 x float> %1, %1		; <<4 x float>>:33 [#uses=1]
-	mul <4 x float> %33, %2		; <<4 x float>>:34 [#uses=1]
-	mul <4 x float> %34, %3		; <<4 x float>>:35 [#uses=1]
-	mul <4 x float> %35, %4		; <<4 x float>>:36 [#uses=1]
-	mul <4 x float> %36, %5		; <<4 x float>>:37 [#uses=1]
-	mul <4 x float> %37, %6		; <<4 x float>>:38 [#uses=1]
-	mul <4 x float> %38, %7		; <<4 x float>>:39 [#uses=1]
-	mul <4 x float> %39, %8		; <<4 x float>>:40 [#uses=1]
-	mul <4 x float> %40, %9		; <<4 x float>>:41 [#uses=1]
-	mul <4 x float> %41, %10		; <<4 x float>>:42 [#uses=1]
-	mul <4 x float> %42, %11		; <<4 x float>>:43 [#uses=1]
-	mul <4 x float> %43, %12		; <<4 x float>>:44 [#uses=1]
-	mul <4 x float> %44, %13		; <<4 x float>>:45 [#uses=1]
-	mul <4 x float> %45, %14		; <<4 x float>>:46 [#uses=1]
-	mul <4 x float> %46, %15		; <<4 x float>>:47 [#uses=1]
-	mul <4 x float> %47, %16		; <<4 x float>>:48 [#uses=1]
-	mul <4 x float> %48, %17		; <<4 x float>>:49 [#uses=1]
-	mul <4 x float> %49, %18		; <<4 x float>>:50 [#uses=1]
-	mul <4 x float> %50, %19		; <<4 x float>>:51 [#uses=1]
-	mul <4 x float> %51, %20		; <<4 x float>>:52 [#uses=1]
-	mul <4 x float> %52, %21		; <<4 x float>>:53 [#uses=1]
-	mul <4 x float> %53, %22		; <<4 x float>>:54 [#uses=1]
-	mul <4 x float> %54, %23		; <<4 x float>>:55 [#uses=1]
-	mul <4 x float> %55, %24		; <<4 x float>>:56 [#uses=1]
-	mul <4 x float> %56, %25		; <<4 x float>>:57 [#uses=1]
-	mul <4 x float> %57, %26		; <<4 x float>>:58 [#uses=1]
-	mul <4 x float> %58, %27		; <<4 x float>>:59 [#uses=1]
-	mul <4 x float> %59, %28		; <<4 x float>>:60 [#uses=1]
-	mul <4 x float> %60, %29		; <<4 x float>>:61 [#uses=1]
-	mul <4 x float> %61, %30		; <<4 x float>>:62 [#uses=1]
-	mul <4 x float> %62, %31		; <<4 x float>>:63 [#uses=1]
-	mul <4 x float> %63, %32		; <<4 x float>>:64 [#uses=3]
-	mul <4 x float> %2, %2		; <<4 x float>>:65 [#uses=1]
-	mul <4 x float> %65, %3		; <<4 x float>>:66 [#uses=1]
-	mul <4 x float> %66, %4		; <<4 x float>>:67 [#uses=1]
-	mul <4 x float> %67, %5		; <<4 x float>>:68 [#uses=1]
-	mul <4 x float> %68, %6		; <<4 x float>>:69 [#uses=1]
-	mul <4 x float> %69, %7		; <<4 x float>>:70 [#uses=1]
-	mul <4 x float> %70, %8		; <<4 x float>>:71 [#uses=1]
-	mul <4 x float> %71, %9		; <<4 x float>>:72 [#uses=1]
-	mul <4 x float> %72, %10		; <<4 x float>>:73 [#uses=1]
-	mul <4 x float> %73, %11		; <<4 x float>>:74 [#uses=1]
-	mul <4 x float> %74, %12		; <<4 x float>>:75 [#uses=1]
-	mul <4 x float> %75, %13		; <<4 x float>>:76 [#uses=1]
-	mul <4 x float> %76, %14		; <<4 x float>>:77 [#uses=1]
-	mul <4 x float> %77, %15		; <<4 x float>>:78 [#uses=1]
-	mul <4 x float> %78, %16		; <<4 x float>>:79 [#uses=1]
-	mul <4 x float> %79, %17		; <<4 x float>>:80 [#uses=1]
-	mul <4 x float> %80, %18		; <<4 x float>>:81 [#uses=1]
-	mul <4 x float> %81, %19		; <<4 x float>>:82 [#uses=1]
-	mul <4 x float> %82, %20		; <<4 x float>>:83 [#uses=1]
-	mul <4 x float> %83, %21		; <<4 x float>>:84 [#uses=1]
-	mul <4 x float> %84, %22		; <<4 x float>>:85 [#uses=1]
-	mul <4 x float> %85, %23		; <<4 x float>>:86 [#uses=1]
-	mul <4 x float> %86, %24		; <<4 x float>>:87 [#uses=1]
-	mul <4 x float> %87, %25		; <<4 x float>>:88 [#uses=1]
-	mul <4 x float> %88, %26		; <<4 x float>>:89 [#uses=1]
-	mul <4 x float> %89, %27		; <<4 x float>>:90 [#uses=1]
-	mul <4 x float> %90, %28		; <<4 x float>>:91 [#uses=1]
-	mul <4 x float> %91, %29		; <<4 x float>>:92 [#uses=1]
-	mul <4 x float> %92, %30		; <<4 x float>>:93 [#uses=1]
-	mul <4 x float> %93, %31		; <<4 x float>>:94 [#uses=1]
-	mul <4 x float> %94, %32		; <<4 x float>>:95 [#uses=1]
-	mul <4 x float> %3, %3		; <<4 x float>>:96 [#uses=1]
-	mul <4 x float> %96, %4		; <<4 x float>>:97 [#uses=1]
-	mul <4 x float> %97, %5		; <<4 x float>>:98 [#uses=1]
-	mul <4 x float> %98, %6		; <<4 x float>>:99 [#uses=1]
-	mul <4 x float> %99, %7		; <<4 x float>>:100 [#uses=1]
-	mul <4 x float> %100, %8		; <<4 x float>>:101 [#uses=1]
-	mul <4 x float> %101, %9		; <<4 x float>>:102 [#uses=1]
-	mul <4 x float> %102, %10		; <<4 x float>>:103 [#uses=1]
-	mul <4 x float> %103, %11		; <<4 x float>>:104 [#uses=1]
-	mul <4 x float> %104, %12		; <<4 x float>>:105 [#uses=1]
-	mul <4 x float> %105, %13		; <<4 x float>>:106 [#uses=1]
-	mul <4 x float> %106, %14		; <<4 x float>>:107 [#uses=1]
-	mul <4 x float> %107, %15		; <<4 x float>>:108 [#uses=1]
-	mul <4 x float> %108, %16		; <<4 x float>>:109 [#uses=1]
-	mul <4 x float> %109, %17		; <<4 x float>>:110 [#uses=1]
-	mul <4 x float> %110, %18		; <<4 x float>>:111 [#uses=1]
-	mul <4 x float> %111, %19		; <<4 x float>>:112 [#uses=1]
-	mul <4 x float> %112, %20		; <<4 x float>>:113 [#uses=1]
-	mul <4 x float> %113, %21		; <<4 x float>>:114 [#uses=1]
-	mul <4 x float> %114, %22		; <<4 x float>>:115 [#uses=1]
-	mul <4 x float> %115, %23		; <<4 x float>>:116 [#uses=1]
-	mul <4 x float> %116, %24		; <<4 x float>>:117 [#uses=1]
-	mul <4 x float> %117, %25		; <<4 x float>>:118 [#uses=1]
-	mul <4 x float> %118, %26		; <<4 x float>>:119 [#uses=1]
-	mul <4 x float> %119, %27		; <<4 x float>>:120 [#uses=1]
-	mul <4 x float> %120, %28		; <<4 x float>>:121 [#uses=1]
-	mul <4 x float> %121, %29		; <<4 x float>>:122 [#uses=1]
-	mul <4 x float> %122, %30		; <<4 x float>>:123 [#uses=1]
-	mul <4 x float> %123, %31		; <<4 x float>>:124 [#uses=1]
-	mul <4 x float> %124, %32		; <<4 x float>>:125 [#uses=1]
-	mul <4 x float> %4, %4		; <<4 x float>>:126 [#uses=1]
-	mul <4 x float> %126, %5		; <<4 x float>>:127 [#uses=1]
-	mul <4 x float> %127, %6		; <<4 x float>>:128 [#uses=1]
-	mul <4 x float> %128, %7		; <<4 x float>>:129 [#uses=1]
-	mul <4 x float> %129, %8		; <<4 x float>>:130 [#uses=1]
-	mul <4 x float> %130, %9		; <<4 x float>>:131 [#uses=1]
-	mul <4 x float> %131, %10		; <<4 x float>>:132 [#uses=1]
-	mul <4 x float> %132, %11		; <<4 x float>>:133 [#uses=1]
-	mul <4 x float> %133, %12		; <<4 x float>>:134 [#uses=1]
-	mul <4 x float> %134, %13		; <<4 x float>>:135 [#uses=1]
-	mul <4 x float> %135, %14		; <<4 x float>>:136 [#uses=1]
-	mul <4 x float> %136, %15		; <<4 x float>>:137 [#uses=1]
-	mul <4 x float> %137, %16		; <<4 x float>>:138 [#uses=1]
-	mul <4 x float> %138, %17		; <<4 x float>>:139 [#uses=1]
-	mul <4 x float> %139, %18		; <<4 x float>>:140 [#uses=1]
-	mul <4 x float> %140, %19		; <<4 x float>>:141 [#uses=1]
-	mul <4 x float> %141, %20		; <<4 x float>>:142 [#uses=1]
-	mul <4 x float> %142, %21		; <<4 x float>>:143 [#uses=1]
-	mul <4 x float> %143, %22		; <<4 x float>>:144 [#uses=1]
-	mul <4 x float> %144, %23		; <<4 x float>>:145 [#uses=1]
-	mul <4 x float> %145, %24		; <<4 x float>>:146 [#uses=1]
-	mul <4 x float> %146, %25		; <<4 x float>>:147 [#uses=1]
-	mul <4 x float> %147, %26		; <<4 x float>>:148 [#uses=1]
-	mul <4 x float> %148, %27		; <<4 x float>>:149 [#uses=1]
-	mul <4 x float> %149, %28		; <<4 x float>>:150 [#uses=1]
-	mul <4 x float> %150, %29		; <<4 x float>>:151 [#uses=1]
-	mul <4 x float> %151, %30		; <<4 x float>>:152 [#uses=1]
-	mul <4 x float> %152, %31		; <<4 x float>>:153 [#uses=1]
-	mul <4 x float> %153, %32		; <<4 x float>>:154 [#uses=1]
-	mul <4 x float> %5, %5		; <<4 x float>>:155 [#uses=1]
-	mul <4 x float> %155, %6		; <<4 x float>>:156 [#uses=1]
-	mul <4 x float> %156, %7		; <<4 x float>>:157 [#uses=1]
-	mul <4 x float> %157, %8		; <<4 x float>>:158 [#uses=1]
-	mul <4 x float> %158, %9		; <<4 x float>>:159 [#uses=1]
-	mul <4 x float> %159, %10		; <<4 x float>>:160 [#uses=1]
-	mul <4 x float> %160, %11		; <<4 x float>>:161 [#uses=1]
-	mul <4 x float> %161, %12		; <<4 x float>>:162 [#uses=1]
-	mul <4 x float> %162, %13		; <<4 x float>>:163 [#uses=1]
-	mul <4 x float> %163, %14		; <<4 x float>>:164 [#uses=1]
-	mul <4 x float> %164, %15		; <<4 x float>>:165 [#uses=1]
-	mul <4 x float> %165, %16		; <<4 x float>>:166 [#uses=1]
-	mul <4 x float> %166, %17		; <<4 x float>>:167 [#uses=1]
-	mul <4 x float> %167, %18		; <<4 x float>>:168 [#uses=1]
-	mul <4 x float> %168, %19		; <<4 x float>>:169 [#uses=1]
-	mul <4 x float> %169, %20		; <<4 x float>>:170 [#uses=1]
-	mul <4 x float> %170, %21		; <<4 x float>>:171 [#uses=1]
-	mul <4 x float> %171, %22		; <<4 x float>>:172 [#uses=1]
-	mul <4 x float> %172, %23		; <<4 x float>>:173 [#uses=1]
-	mul <4 x float> %173, %24		; <<4 x float>>:174 [#uses=1]
-	mul <4 x float> %174, %25		; <<4 x float>>:175 [#uses=1]
-	mul <4 x float> %175, %26		; <<4 x float>>:176 [#uses=1]
-	mul <4 x float> %176, %27		; <<4 x float>>:177 [#uses=1]
-	mul <4 x float> %177, %28		; <<4 x float>>:178 [#uses=1]
-	mul <4 x float> %178, %29		; <<4 x float>>:179 [#uses=1]
-	mul <4 x float> %179, %30		; <<4 x float>>:180 [#uses=1]
-	mul <4 x float> %180, %31		; <<4 x float>>:181 [#uses=1]
-	mul <4 x float> %181, %32		; <<4 x float>>:182 [#uses=1]
-	mul <4 x float> %6, %6		; <<4 x float>>:183 [#uses=1]
-	mul <4 x float> %183, %7		; <<4 x float>>:184 [#uses=1]
-	mul <4 x float> %184, %8		; <<4 x float>>:185 [#uses=1]
-	mul <4 x float> %185, %9		; <<4 x float>>:186 [#uses=1]
-	mul <4 x float> %186, %10		; <<4 x float>>:187 [#uses=1]
-	mul <4 x float> %187, %11		; <<4 x float>>:188 [#uses=1]
-	mul <4 x float> %188, %12		; <<4 x float>>:189 [#uses=1]
-	mul <4 x float> %189, %13		; <<4 x float>>:190 [#uses=1]
-	mul <4 x float> %190, %14		; <<4 x float>>:191 [#uses=1]
-	mul <4 x float> %191, %15		; <<4 x float>>:192 [#uses=1]
-	mul <4 x float> %192, %16		; <<4 x float>>:193 [#uses=1]
-	mul <4 x float> %193, %17		; <<4 x float>>:194 [#uses=1]
-	mul <4 x float> %194, %18		; <<4 x float>>:195 [#uses=1]
-	mul <4 x float> %195, %19		; <<4 x float>>:196 [#uses=1]
-	mul <4 x float> %196, %20		; <<4 x float>>:197 [#uses=1]
-	mul <4 x float> %197, %21		; <<4 x float>>:198 [#uses=1]
-	mul <4 x float> %198, %22		; <<4 x float>>:199 [#uses=1]
-	mul <4 x float> %199, %23		; <<4 x float>>:200 [#uses=1]
-	mul <4 x float> %200, %24		; <<4 x float>>:201 [#uses=1]
-	mul <4 x float> %201, %25		; <<4 x float>>:202 [#uses=1]
-	mul <4 x float> %202, %26		; <<4 x float>>:203 [#uses=1]
-	mul <4 x float> %203, %27		; <<4 x float>>:204 [#uses=1]
-	mul <4 x float> %204, %28		; <<4 x float>>:205 [#uses=1]
-	mul <4 x float> %205, %29		; <<4 x float>>:206 [#uses=1]
-	mul <4 x float> %206, %30		; <<4 x float>>:207 [#uses=1]
-	mul <4 x float> %207, %31		; <<4 x float>>:208 [#uses=1]
-	mul <4 x float> %208, %32		; <<4 x float>>:209 [#uses=1]
-	mul <4 x float> %7, %7		; <<4 x float>>:210 [#uses=1]
-	mul <4 x float> %210, %8		; <<4 x float>>:211 [#uses=1]
-	mul <4 x float> %211, %9		; <<4 x float>>:212 [#uses=1]
-	mul <4 x float> %212, %10		; <<4 x float>>:213 [#uses=1]
-	mul <4 x float> %213, %11		; <<4 x float>>:214 [#uses=1]
-	mul <4 x float> %214, %12		; <<4 x float>>:215 [#uses=1]
-	mul <4 x float> %215, %13		; <<4 x float>>:216 [#uses=1]
-	mul <4 x float> %216, %14		; <<4 x float>>:217 [#uses=1]
-	mul <4 x float> %217, %15		; <<4 x float>>:218 [#uses=1]
-	mul <4 x float> %218, %16		; <<4 x float>>:219 [#uses=1]
-	mul <4 x float> %219, %17		; <<4 x float>>:220 [#uses=1]
-	mul <4 x float> %220, %18		; <<4 x float>>:221 [#uses=1]
-	mul <4 x float> %221, %19		; <<4 x float>>:222 [#uses=1]
-	mul <4 x float> %222, %20		; <<4 x float>>:223 [#uses=1]
-	mul <4 x float> %223, %21		; <<4 x float>>:224 [#uses=1]
-	mul <4 x float> %224, %22		; <<4 x float>>:225 [#uses=1]
-	mul <4 x float> %225, %23		; <<4 x float>>:226 [#uses=1]
-	mul <4 x float> %226, %24		; <<4 x float>>:227 [#uses=1]
-	mul <4 x float> %227, %25		; <<4 x float>>:228 [#uses=1]
-	mul <4 x float> %228, %26		; <<4 x float>>:229 [#uses=1]
-	mul <4 x float> %229, %27		; <<4 x float>>:230 [#uses=1]
-	mul <4 x float> %230, %28		; <<4 x float>>:231 [#uses=1]
-	mul <4 x float> %231, %29		; <<4 x float>>:232 [#uses=1]
-	mul <4 x float> %232, %30		; <<4 x float>>:233 [#uses=1]
-	mul <4 x float> %233, %31		; <<4 x float>>:234 [#uses=1]
-	mul <4 x float> %234, %32		; <<4 x float>>:235 [#uses=1]
-	mul <4 x float> %8, %8		; <<4 x float>>:236 [#uses=1]
-	mul <4 x float> %236, %9		; <<4 x float>>:237 [#uses=1]
-	mul <4 x float> %237, %10		; <<4 x float>>:238 [#uses=1]
-	mul <4 x float> %238, %11		; <<4 x float>>:239 [#uses=1]
-	mul <4 x float> %239, %12		; <<4 x float>>:240 [#uses=1]
-	mul <4 x float> %240, %13		; <<4 x float>>:241 [#uses=1]
-	mul <4 x float> %241, %14		; <<4 x float>>:242 [#uses=1]
-	mul <4 x float> %242, %15		; <<4 x float>>:243 [#uses=1]
-	mul <4 x float> %243, %16		; <<4 x float>>:244 [#uses=1]
-	mul <4 x float> %244, %17		; <<4 x float>>:245 [#uses=1]
-	mul <4 x float> %245, %18		; <<4 x float>>:246 [#uses=1]
-	mul <4 x float> %246, %19		; <<4 x float>>:247 [#uses=1]
-	mul <4 x float> %247, %20		; <<4 x float>>:248 [#uses=1]
-	mul <4 x float> %248, %21		; <<4 x float>>:249 [#uses=1]
-	mul <4 x float> %249, %22		; <<4 x float>>:250 [#uses=1]
-	mul <4 x float> %250, %23		; <<4 x float>>:251 [#uses=1]
-	mul <4 x float> %251, %24		; <<4 x float>>:252 [#uses=1]
-	mul <4 x float> %252, %25		; <<4 x float>>:253 [#uses=1]
-	mul <4 x float> %253, %26		; <<4 x float>>:254 [#uses=1]
-	mul <4 x float> %254, %27		; <<4 x float>>:255 [#uses=1]
-	mul <4 x float> %255, %28		; <<4 x float>>:256 [#uses=1]
-	mul <4 x float> %256, %29		; <<4 x float>>:257 [#uses=1]
-	mul <4 x float> %257, %30		; <<4 x float>>:258 [#uses=1]
-	mul <4 x float> %258, %31		; <<4 x float>>:259 [#uses=1]
-	mul <4 x float> %259, %32		; <<4 x float>>:260 [#uses=1]
-	mul <4 x float> %9, %9		; <<4 x float>>:261 [#uses=1]
-	mul <4 x float> %261, %10		; <<4 x float>>:262 [#uses=1]
-	mul <4 x float> %262, %11		; <<4 x float>>:263 [#uses=1]
-	mul <4 x float> %263, %12		; <<4 x float>>:264 [#uses=1]
-	mul <4 x float> %264, %13		; <<4 x float>>:265 [#uses=1]
-	mul <4 x float> %265, %14		; <<4 x float>>:266 [#uses=1]
-	mul <4 x float> %266, %15		; <<4 x float>>:267 [#uses=1]
-	mul <4 x float> %267, %16		; <<4 x float>>:268 [#uses=1]
-	mul <4 x float> %268, %17		; <<4 x float>>:269 [#uses=1]
-	mul <4 x float> %269, %18		; <<4 x float>>:270 [#uses=1]
-	mul <4 x float> %270, %19		; <<4 x float>>:271 [#uses=1]
-	mul <4 x float> %271, %20		; <<4 x float>>:272 [#uses=1]
-	mul <4 x float> %272, %21		; <<4 x float>>:273 [#uses=1]
-	mul <4 x float> %273, %22		; <<4 x float>>:274 [#uses=1]
-	mul <4 x float> %274, %23		; <<4 x float>>:275 [#uses=1]
-	mul <4 x float> %275, %24		; <<4 x float>>:276 [#uses=1]
-	mul <4 x float> %276, %25		; <<4 x float>>:277 [#uses=1]
-	mul <4 x float> %277, %26		; <<4 x float>>:278 [#uses=1]
-	mul <4 x float> %278, %27		; <<4 x float>>:279 [#uses=1]
-	mul <4 x float> %279, %28		; <<4 x float>>:280 [#uses=1]
-	mul <4 x float> %280, %29		; <<4 x float>>:281 [#uses=1]
-	mul <4 x float> %281, %30		; <<4 x float>>:282 [#uses=1]
-	mul <4 x float> %282, %31		; <<4 x float>>:283 [#uses=1]
-	mul <4 x float> %283, %32		; <<4 x float>>:284 [#uses=1]
-	mul <4 x float> %10, %10		; <<4 x float>>:285 [#uses=1]
-	mul <4 x float> %285, %11		; <<4 x float>>:286 [#uses=1]
-	mul <4 x float> %286, %12		; <<4 x float>>:287 [#uses=1]
-	mul <4 x float> %287, %13		; <<4 x float>>:288 [#uses=1]
-	mul <4 x float> %288, %14		; <<4 x float>>:289 [#uses=1]
-	mul <4 x float> %289, %15		; <<4 x float>>:290 [#uses=1]
-	mul <4 x float> %290, %16		; <<4 x float>>:291 [#uses=1]
-	mul <4 x float> %291, %17		; <<4 x float>>:292 [#uses=1]
-	mul <4 x float> %292, %18		; <<4 x float>>:293 [#uses=1]
-	mul <4 x float> %293, %19		; <<4 x float>>:294 [#uses=1]
-	mul <4 x float> %294, %20		; <<4 x float>>:295 [#uses=1]
-	mul <4 x float> %295, %21		; <<4 x float>>:296 [#uses=1]
-	mul <4 x float> %296, %22		; <<4 x float>>:297 [#uses=1]
-	mul <4 x float> %297, %23		; <<4 x float>>:298 [#uses=1]
-	mul <4 x float> %298, %24		; <<4 x float>>:299 [#uses=1]
-	mul <4 x float> %299, %25		; <<4 x float>>:300 [#uses=1]
-	mul <4 x float> %300, %26		; <<4 x float>>:301 [#uses=1]
-	mul <4 x float> %301, %27		; <<4 x float>>:302 [#uses=1]
-	mul <4 x float> %302, %28		; <<4 x float>>:303 [#uses=1]
-	mul <4 x float> %303, %29		; <<4 x float>>:304 [#uses=1]
-	mul <4 x float> %304, %30		; <<4 x float>>:305 [#uses=1]
-	mul <4 x float> %305, %31		; <<4 x float>>:306 [#uses=1]
-	mul <4 x float> %306, %32		; <<4 x float>>:307 [#uses=1]
-	mul <4 x float> %11, %11		; <<4 x float>>:308 [#uses=1]
-	mul <4 x float> %308, %12		; <<4 x float>>:309 [#uses=1]
-	mul <4 x float> %309, %13		; <<4 x float>>:310 [#uses=1]
-	mul <4 x float> %310, %14		; <<4 x float>>:311 [#uses=1]
-	mul <4 x float> %311, %15		; <<4 x float>>:312 [#uses=1]
-	mul <4 x float> %312, %16		; <<4 x float>>:313 [#uses=1]
-	mul <4 x float> %313, %17		; <<4 x float>>:314 [#uses=1]
-	mul <4 x float> %314, %18		; <<4 x float>>:315 [#uses=1]
-	mul <4 x float> %315, %19		; <<4 x float>>:316 [#uses=1]
-	mul <4 x float> %316, %20		; <<4 x float>>:317 [#uses=1]
-	mul <4 x float> %317, %21		; <<4 x float>>:318 [#uses=1]
-	mul <4 x float> %318, %22		; <<4 x float>>:319 [#uses=1]
-	mul <4 x float> %319, %23		; <<4 x float>>:320 [#uses=1]
-	mul <4 x float> %320, %24		; <<4 x float>>:321 [#uses=1]
-	mul <4 x float> %321, %25		; <<4 x float>>:322 [#uses=1]
-	mul <4 x float> %322, %26		; <<4 x float>>:323 [#uses=1]
-	mul <4 x float> %323, %27		; <<4 x float>>:324 [#uses=1]
-	mul <4 x float> %324, %28		; <<4 x float>>:325 [#uses=1]
-	mul <4 x float> %325, %29		; <<4 x float>>:326 [#uses=1]
-	mul <4 x float> %326, %30		; <<4 x float>>:327 [#uses=1]
-	mul <4 x float> %327, %31		; <<4 x float>>:328 [#uses=1]
-	mul <4 x float> %328, %32		; <<4 x float>>:329 [#uses=1]
-	mul <4 x float> %12, %12		; <<4 x float>>:330 [#uses=1]
-	mul <4 x float> %330, %13		; <<4 x float>>:331 [#uses=1]
-	mul <4 x float> %331, %14		; <<4 x float>>:332 [#uses=1]
-	mul <4 x float> %332, %15		; <<4 x float>>:333 [#uses=1]
-	mul <4 x float> %333, %16		; <<4 x float>>:334 [#uses=1]
-	mul <4 x float> %334, %17		; <<4 x float>>:335 [#uses=1]
-	mul <4 x float> %335, %18		; <<4 x float>>:336 [#uses=1]
-	mul <4 x float> %336, %19		; <<4 x float>>:337 [#uses=1]
-	mul <4 x float> %337, %20		; <<4 x float>>:338 [#uses=1]
-	mul <4 x float> %338, %21		; <<4 x float>>:339 [#uses=1]
-	mul <4 x float> %339, %22		; <<4 x float>>:340 [#uses=1]
-	mul <4 x float> %340, %23		; <<4 x float>>:341 [#uses=1]
-	mul <4 x float> %341, %24		; <<4 x float>>:342 [#uses=1]
-	mul <4 x float> %342, %25		; <<4 x float>>:343 [#uses=1]
-	mul <4 x float> %343, %26		; <<4 x float>>:344 [#uses=1]
-	mul <4 x float> %344, %27		; <<4 x float>>:345 [#uses=1]
-	mul <4 x float> %345, %28		; <<4 x float>>:346 [#uses=1]
-	mul <4 x float> %346, %29		; <<4 x float>>:347 [#uses=1]
-	mul <4 x float> %347, %30		; <<4 x float>>:348 [#uses=1]
-	mul <4 x float> %348, %31		; <<4 x float>>:349 [#uses=1]
-	mul <4 x float> %349, %32		; <<4 x float>>:350 [#uses=1]
-	mul <4 x float> %13, %13		; <<4 x float>>:351 [#uses=1]
-	mul <4 x float> %351, %14		; <<4 x float>>:352 [#uses=1]
-	mul <4 x float> %352, %15		; <<4 x float>>:353 [#uses=1]
-	mul <4 x float> %353, %16		; <<4 x float>>:354 [#uses=1]
-	mul <4 x float> %354, %17		; <<4 x float>>:355 [#uses=1]
-	mul <4 x float> %355, %18		; <<4 x float>>:356 [#uses=1]
-	mul <4 x float> %356, %19		; <<4 x float>>:357 [#uses=1]
-	mul <4 x float> %357, %20		; <<4 x float>>:358 [#uses=1]
-	mul <4 x float> %358, %21		; <<4 x float>>:359 [#uses=1]
-	mul <4 x float> %359, %22		; <<4 x float>>:360 [#uses=1]
-	mul <4 x float> %360, %23		; <<4 x float>>:361 [#uses=1]
-	mul <4 x float> %361, %24		; <<4 x float>>:362 [#uses=1]
-	mul <4 x float> %362, %25		; <<4 x float>>:363 [#uses=1]
-	mul <4 x float> %363, %26		; <<4 x float>>:364 [#uses=1]
-	mul <4 x float> %364, %27		; <<4 x float>>:365 [#uses=1]
-	mul <4 x float> %365, %28		; <<4 x float>>:366 [#uses=1]
-	mul <4 x float> %366, %29		; <<4 x float>>:367 [#uses=1]
-	mul <4 x float> %367, %30		; <<4 x float>>:368 [#uses=1]
-	mul <4 x float> %368, %31		; <<4 x float>>:369 [#uses=1]
-	mul <4 x float> %369, %32		; <<4 x float>>:370 [#uses=1]
-	mul <4 x float> %14, %14		; <<4 x float>>:371 [#uses=1]
-	mul <4 x float> %371, %15		; <<4 x float>>:372 [#uses=1]
-	mul <4 x float> %372, %16		; <<4 x float>>:373 [#uses=1]
-	mul <4 x float> %373, %17		; <<4 x float>>:374 [#uses=1]
-	mul <4 x float> %374, %18		; <<4 x float>>:375 [#uses=1]
-	mul <4 x float> %375, %19		; <<4 x float>>:376 [#uses=1]
-	mul <4 x float> %376, %20		; <<4 x float>>:377 [#uses=1]
-	mul <4 x float> %377, %21		; <<4 x float>>:378 [#uses=1]
-	mul <4 x float> %378, %22		; <<4 x float>>:379 [#uses=1]
-	mul <4 x float> %379, %23		; <<4 x float>>:380 [#uses=1]
-	mul <4 x float> %380, %24		; <<4 x float>>:381 [#uses=1]
-	mul <4 x float> %381, %25		; <<4 x float>>:382 [#uses=1]
-	mul <4 x float> %382, %26		; <<4 x float>>:383 [#uses=1]
-	mul <4 x float> %383, %27		; <<4 x float>>:384 [#uses=1]
-	mul <4 x float> %384, %28		; <<4 x float>>:385 [#uses=1]
-	mul <4 x float> %385, %29		; <<4 x float>>:386 [#uses=1]
-	mul <4 x float> %386, %30		; <<4 x float>>:387 [#uses=1]
-	mul <4 x float> %387, %31		; <<4 x float>>:388 [#uses=1]
-	mul <4 x float> %388, %32		; <<4 x float>>:389 [#uses=1]
-	mul <4 x float> %15, %15		; <<4 x float>>:390 [#uses=1]
-	mul <4 x float> %390, %16		; <<4 x float>>:391 [#uses=1]
-	mul <4 x float> %391, %17		; <<4 x float>>:392 [#uses=1]
-	mul <4 x float> %392, %18		; <<4 x float>>:393 [#uses=1]
-	mul <4 x float> %393, %19		; <<4 x float>>:394 [#uses=1]
-	mul <4 x float> %394, %20		; <<4 x float>>:395 [#uses=1]
-	mul <4 x float> %395, %21		; <<4 x float>>:396 [#uses=1]
-	mul <4 x float> %396, %22		; <<4 x float>>:397 [#uses=1]
-	mul <4 x float> %397, %23		; <<4 x float>>:398 [#uses=1]
-	mul <4 x float> %398, %24		; <<4 x float>>:399 [#uses=1]
-	mul <4 x float> %399, %25		; <<4 x float>>:400 [#uses=1]
-	mul <4 x float> %400, %26		; <<4 x float>>:401 [#uses=1]
-	mul <4 x float> %401, %27		; <<4 x float>>:402 [#uses=1]
-	mul <4 x float> %402, %28		; <<4 x float>>:403 [#uses=1]
-	mul <4 x float> %403, %29		; <<4 x float>>:404 [#uses=1]
-	mul <4 x float> %404, %30		; <<4 x float>>:405 [#uses=1]
-	mul <4 x float> %405, %31		; <<4 x float>>:406 [#uses=1]
-	mul <4 x float> %406, %32		; <<4 x float>>:407 [#uses=1]
-	mul <4 x float> %16, %16		; <<4 x float>>:408 [#uses=1]
-	mul <4 x float> %408, %17		; <<4 x float>>:409 [#uses=1]
-	mul <4 x float> %409, %18		; <<4 x float>>:410 [#uses=1]
-	mul <4 x float> %410, %19		; <<4 x float>>:411 [#uses=1]
-	mul <4 x float> %411, %20		; <<4 x float>>:412 [#uses=1]
-	mul <4 x float> %412, %21		; <<4 x float>>:413 [#uses=1]
-	mul <4 x float> %413, %22		; <<4 x float>>:414 [#uses=1]
-	mul <4 x float> %414, %23		; <<4 x float>>:415 [#uses=1]
-	mul <4 x float> %415, %24		; <<4 x float>>:416 [#uses=1]
-	mul <4 x float> %416, %25		; <<4 x float>>:417 [#uses=1]
-	mul <4 x float> %417, %26		; <<4 x float>>:418 [#uses=1]
-	mul <4 x float> %418, %27		; <<4 x float>>:419 [#uses=1]
-	mul <4 x float> %419, %28		; <<4 x float>>:420 [#uses=1]
-	mul <4 x float> %420, %29		; <<4 x float>>:421 [#uses=1]
-	mul <4 x float> %421, %30		; <<4 x float>>:422 [#uses=1]
-	mul <4 x float> %422, %31		; <<4 x float>>:423 [#uses=1]
-	mul <4 x float> %423, %32		; <<4 x float>>:424 [#uses=1]
-	mul <4 x float> %17, %17		; <<4 x float>>:425 [#uses=1]
-	mul <4 x float> %425, %18		; <<4 x float>>:426 [#uses=1]
-	mul <4 x float> %426, %19		; <<4 x float>>:427 [#uses=1]
-	mul <4 x float> %427, %20		; <<4 x float>>:428 [#uses=1]
-	mul <4 x float> %428, %21		; <<4 x float>>:429 [#uses=1]
-	mul <4 x float> %429, %22		; <<4 x float>>:430 [#uses=1]
-	mul <4 x float> %430, %23		; <<4 x float>>:431 [#uses=1]
-	mul <4 x float> %431, %24		; <<4 x float>>:432 [#uses=1]
-	mul <4 x float> %432, %25		; <<4 x float>>:433 [#uses=1]
-	mul <4 x float> %433, %26		; <<4 x float>>:434 [#uses=1]
-	mul <4 x float> %434, %27		; <<4 x float>>:435 [#uses=1]
-	mul <4 x float> %435, %28		; <<4 x float>>:436 [#uses=1]
-	mul <4 x float> %436, %29		; <<4 x float>>:437 [#uses=1]
-	mul <4 x float> %437, %30		; <<4 x float>>:438 [#uses=1]
-	mul <4 x float> %438, %31		; <<4 x float>>:439 [#uses=1]
-	mul <4 x float> %439, %32		; <<4 x float>>:440 [#uses=1]
-	mul <4 x float> %18, %18		; <<4 x float>>:441 [#uses=1]
-	mul <4 x float> %441, %19		; <<4 x float>>:442 [#uses=1]
-	mul <4 x float> %442, %20		; <<4 x float>>:443 [#uses=1]
-	mul <4 x float> %443, %21		; <<4 x float>>:444 [#uses=1]
-	mul <4 x float> %444, %22		; <<4 x float>>:445 [#uses=1]
-	mul <4 x float> %445, %23		; <<4 x float>>:446 [#uses=1]
-	mul <4 x float> %446, %24		; <<4 x float>>:447 [#uses=1]
-	mul <4 x float> %447, %25		; <<4 x float>>:448 [#uses=1]
-	mul <4 x float> %448, %26		; <<4 x float>>:449 [#uses=1]
-	mul <4 x float> %449, %27		; <<4 x float>>:450 [#uses=1]
-	mul <4 x float> %450, %28		; <<4 x float>>:451 [#uses=1]
-	mul <4 x float> %451, %29		; <<4 x float>>:452 [#uses=1]
-	mul <4 x float> %452, %30		; <<4 x float>>:453 [#uses=1]
-	mul <4 x float> %453, %31		; <<4 x float>>:454 [#uses=1]
-	mul <4 x float> %454, %32		; <<4 x float>>:455 [#uses=1]
-	mul <4 x float> %19, %19		; <<4 x float>>:456 [#uses=1]
-	mul <4 x float> %456, %20		; <<4 x float>>:457 [#uses=1]
-	mul <4 x float> %457, %21		; <<4 x float>>:458 [#uses=1]
-	mul <4 x float> %458, %22		; <<4 x float>>:459 [#uses=1]
-	mul <4 x float> %459, %23		; <<4 x float>>:460 [#uses=1]
-	mul <4 x float> %460, %24		; <<4 x float>>:461 [#uses=1]
-	mul <4 x float> %461, %25		; <<4 x float>>:462 [#uses=1]
-	mul <4 x float> %462, %26		; <<4 x float>>:463 [#uses=1]
-	mul <4 x float> %463, %27		; <<4 x float>>:464 [#uses=1]
-	mul <4 x float> %464, %28		; <<4 x float>>:465 [#uses=1]
-	mul <4 x float> %465, %29		; <<4 x float>>:466 [#uses=1]
-	mul <4 x float> %466, %30		; <<4 x float>>:467 [#uses=1]
-	mul <4 x float> %467, %31		; <<4 x float>>:468 [#uses=1]
-	mul <4 x float> %468, %32		; <<4 x float>>:469 [#uses=1]
-	mul <4 x float> %20, %20		; <<4 x float>>:470 [#uses=1]
-	mul <4 x float> %470, %21		; <<4 x float>>:471 [#uses=1]
-	mul <4 x float> %471, %22		; <<4 x float>>:472 [#uses=1]
-	mul <4 x float> %472, %23		; <<4 x float>>:473 [#uses=1]
-	mul <4 x float> %473, %24		; <<4 x float>>:474 [#uses=1]
-	mul <4 x float> %474, %25		; <<4 x float>>:475 [#uses=1]
-	mul <4 x float> %475, %26		; <<4 x float>>:476 [#uses=1]
-	mul <4 x float> %476, %27		; <<4 x float>>:477 [#uses=1]
-	mul <4 x float> %477, %28		; <<4 x float>>:478 [#uses=1]
-	mul <4 x float> %478, %29		; <<4 x float>>:479 [#uses=1]
-	mul <4 x float> %479, %30		; <<4 x float>>:480 [#uses=1]
-	mul <4 x float> %480, %31		; <<4 x float>>:481 [#uses=1]
-	mul <4 x float> %481, %32		; <<4 x float>>:482 [#uses=1]
-	mul <4 x float> %21, %21		; <<4 x float>>:483 [#uses=1]
-	mul <4 x float> %483, %22		; <<4 x float>>:484 [#uses=1]
-	mul <4 x float> %484, %23		; <<4 x float>>:485 [#uses=1]
-	mul <4 x float> %485, %24		; <<4 x float>>:486 [#uses=1]
-	mul <4 x float> %486, %25		; <<4 x float>>:487 [#uses=1]
-	mul <4 x float> %487, %26		; <<4 x float>>:488 [#uses=1]
-	mul <4 x float> %488, %27		; <<4 x float>>:489 [#uses=1]
-	mul <4 x float> %489, %28		; <<4 x float>>:490 [#uses=1]
-	mul <4 x float> %490, %29		; <<4 x float>>:491 [#uses=1]
-	mul <4 x float> %491, %30		; <<4 x float>>:492 [#uses=1]
-	mul <4 x float> %492, %31		; <<4 x float>>:493 [#uses=1]
-	mul <4 x float> %493, %32		; <<4 x float>>:494 [#uses=1]
-	mul <4 x float> %22, %22		; <<4 x float>>:495 [#uses=1]
-	mul <4 x float> %495, %23		; <<4 x float>>:496 [#uses=1]
-	mul <4 x float> %496, %24		; <<4 x float>>:497 [#uses=1]
-	mul <4 x float> %497, %25		; <<4 x float>>:498 [#uses=1]
-	mul <4 x float> %498, %26		; <<4 x float>>:499 [#uses=1]
-	mul <4 x float> %499, %27		; <<4 x float>>:500 [#uses=1]
-	mul <4 x float> %500, %28		; <<4 x float>>:501 [#uses=1]
-	mul <4 x float> %501, %29		; <<4 x float>>:502 [#uses=1]
-	mul <4 x float> %502, %30		; <<4 x float>>:503 [#uses=1]
-	mul <4 x float> %503, %31		; <<4 x float>>:504 [#uses=1]
-	mul <4 x float> %504, %32		; <<4 x float>>:505 [#uses=1]
-	mul <4 x float> %23, %23		; <<4 x float>>:506 [#uses=1]
-	mul <4 x float> %506, %24		; <<4 x float>>:507 [#uses=1]
-	mul <4 x float> %507, %25		; <<4 x float>>:508 [#uses=1]
-	mul <4 x float> %508, %26		; <<4 x float>>:509 [#uses=1]
-	mul <4 x float> %509, %27		; <<4 x float>>:510 [#uses=1]
-	mul <4 x float> %510, %28		; <<4 x float>>:511 [#uses=1]
-	mul <4 x float> %511, %29		; <<4 x float>>:512 [#uses=1]
-	mul <4 x float> %512, %30		; <<4 x float>>:513 [#uses=1]
-	mul <4 x float> %513, %31		; <<4 x float>>:514 [#uses=1]
-	mul <4 x float> %514, %32		; <<4 x float>>:515 [#uses=1]
-	mul <4 x float> %24, %24		; <<4 x float>>:516 [#uses=1]
-	mul <4 x float> %516, %25		; <<4 x float>>:517 [#uses=1]
-	mul <4 x float> %517, %26		; <<4 x float>>:518 [#uses=1]
-	mul <4 x float> %518, %27		; <<4 x float>>:519 [#uses=1]
-	mul <4 x float> %519, %28		; <<4 x float>>:520 [#uses=1]
-	mul <4 x float> %520, %29		; <<4 x float>>:521 [#uses=1]
-	mul <4 x float> %521, %30		; <<4 x float>>:522 [#uses=1]
-	mul <4 x float> %522, %31		; <<4 x float>>:523 [#uses=1]
-	mul <4 x float> %523, %32		; <<4 x float>>:524 [#uses=1]
-	mul <4 x float> %25, %25		; <<4 x float>>:525 [#uses=1]
-	mul <4 x float> %525, %26		; <<4 x float>>:526 [#uses=1]
-	mul <4 x float> %526, %27		; <<4 x float>>:527 [#uses=1]
-	mul <4 x float> %527, %28		; <<4 x float>>:528 [#uses=1]
-	mul <4 x float> %528, %29		; <<4 x float>>:529 [#uses=1]
-	mul <4 x float> %529, %30		; <<4 x float>>:530 [#uses=1]
-	mul <4 x float> %530, %31		; <<4 x float>>:531 [#uses=1]
-	mul <4 x float> %531, %32		; <<4 x float>>:532 [#uses=1]
-	mul <4 x float> %26, %26		; <<4 x float>>:533 [#uses=1]
-	mul <4 x float> %533, %27		; <<4 x float>>:534 [#uses=1]
-	mul <4 x float> %534, %28		; <<4 x float>>:535 [#uses=1]
-	mul <4 x float> %535, %29		; <<4 x float>>:536 [#uses=1]
-	mul <4 x float> %536, %30		; <<4 x float>>:537 [#uses=1]
-	mul <4 x float> %537, %31		; <<4 x float>>:538 [#uses=1]
-	mul <4 x float> %538, %32		; <<4 x float>>:539 [#uses=1]
-	mul <4 x float> %27, %27		; <<4 x float>>:540 [#uses=1]
-	mul <4 x float> %540, %28		; <<4 x float>>:541 [#uses=1]
-	mul <4 x float> %541, %29		; <<4 x float>>:542 [#uses=1]
-	mul <4 x float> %542, %30		; <<4 x float>>:543 [#uses=1]
-	mul <4 x float> %543, %31		; <<4 x float>>:544 [#uses=1]
-	mul <4 x float> %544, %32		; <<4 x float>>:545 [#uses=1]
-	mul <4 x float> %28, %28		; <<4 x float>>:546 [#uses=1]
-	mul <4 x float> %546, %29		; <<4 x float>>:547 [#uses=1]
-	mul <4 x float> %547, %30		; <<4 x float>>:548 [#uses=1]
-	mul <4 x float> %548, %31		; <<4 x float>>:549 [#uses=1]
-	mul <4 x float> %549, %32		; <<4 x float>>:550 [#uses=1]
-	mul <4 x float> %29, %29		; <<4 x float>>:551 [#uses=1]
-	mul <4 x float> %551, %30		; <<4 x float>>:552 [#uses=1]
-	mul <4 x float> %552, %31		; <<4 x float>>:553 [#uses=1]
-	mul <4 x float> %553, %32		; <<4 x float>>:554 [#uses=1]
-	mul <4 x float> %30, %30		; <<4 x float>>:555 [#uses=1]
-	mul <4 x float> %555, %31		; <<4 x float>>:556 [#uses=1]
-	mul <4 x float> %556, %32		; <<4 x float>>:557 [#uses=1]
-	mul <4 x float> %31, %31		; <<4 x float>>:558 [#uses=1]
-	mul <4 x float> %558, %32		; <<4 x float>>:559 [#uses=1]
-	mul <4 x float> %32, %32		; <<4 x float>>:560 [#uses=1]
-	add <4 x float> %64, %64		; <<4 x float>>:561 [#uses=1]
-	add <4 x float> %561, %64		; <<4 x float>>:562 [#uses=1]
-	add <4 x float> %562, %95		; <<4 x float>>:563 [#uses=1]
-	add <4 x float> %563, %125		; <<4 x float>>:564 [#uses=1]
-	add <4 x float> %564, %154		; <<4 x float>>:565 [#uses=1]
-	add <4 x float> %565, %182		; <<4 x float>>:566 [#uses=1]
-	add <4 x float> %566, %209		; <<4 x float>>:567 [#uses=1]
-	add <4 x float> %567, %235		; <<4 x float>>:568 [#uses=1]
-	add <4 x float> %568, %260		; <<4 x float>>:569 [#uses=1]
-	add <4 x float> %569, %284		; <<4 x float>>:570 [#uses=1]
-	add <4 x float> %570, %307		; <<4 x float>>:571 [#uses=1]
-	add <4 x float> %571, %329		; <<4 x float>>:572 [#uses=1]
-	add <4 x float> %572, %350		; <<4 x float>>:573 [#uses=1]
-	add <4 x float> %573, %370		; <<4 x float>>:574 [#uses=1]
-	add <4 x float> %574, %389		; <<4 x float>>:575 [#uses=1]
-	add <4 x float> %575, %407		; <<4 x float>>:576 [#uses=1]
-	add <4 x float> %576, %424		; <<4 x float>>:577 [#uses=1]
-	add <4 x float> %577, %440		; <<4 x float>>:578 [#uses=1]
-	add <4 x float> %578, %455		; <<4 x float>>:579 [#uses=1]
-	add <4 x float> %579, %469		; <<4 x float>>:580 [#uses=1]
-	add <4 x float> %580, %482		; <<4 x float>>:581 [#uses=1]
-	add <4 x float> %581, %494		; <<4 x float>>:582 [#uses=1]
-	add <4 x float> %582, %505		; <<4 x float>>:583 [#uses=1]
-	add <4 x float> %583, %515		; <<4 x float>>:584 [#uses=1]
-	add <4 x float> %584, %524		; <<4 x float>>:585 [#uses=1]
-	add <4 x float> %585, %532		; <<4 x float>>:586 [#uses=1]
-	add <4 x float> %586, %539		; <<4 x float>>:587 [#uses=1]
-	add <4 x float> %587, %545		; <<4 x float>>:588 [#uses=1]
-	add <4 x float> %588, %550		; <<4 x float>>:589 [#uses=1]
-	add <4 x float> %589, %554		; <<4 x float>>:590 [#uses=1]
-	add <4 x float> %590, %557		; <<4 x float>>:591 [#uses=1]
-	add <4 x float> %591, %559		; <<4 x float>>:592 [#uses=1]
-	add <4 x float> %592, %560		; <<4 x float>>:593 [#uses=1]
+	fmul <4 x float> %1, %1		; <<4 x float>>:33 [#uses=1]
+	fmul <4 x float> %33, %2		; <<4 x float>>:34 [#uses=1]
+	fmul <4 x float> %34, %3		; <<4 x float>>:35 [#uses=1]
+	fmul <4 x float> %35, %4		; <<4 x float>>:36 [#uses=1]
+	fmul <4 x float> %36, %5		; <<4 x float>>:37 [#uses=1]
+	fmul <4 x float> %37, %6		; <<4 x float>>:38 [#uses=1]
+	fmul <4 x float> %38, %7		; <<4 x float>>:39 [#uses=1]
+	fmul <4 x float> %39, %8		; <<4 x float>>:40 [#uses=1]
+	fmul <4 x float> %40, %9		; <<4 x float>>:41 [#uses=1]
+	fmul <4 x float> %41, %10		; <<4 x float>>:42 [#uses=1]
+	fmul <4 x float> %42, %11		; <<4 x float>>:43 [#uses=1]
+	fmul <4 x float> %43, %12		; <<4 x float>>:44 [#uses=1]
+	fmul <4 x float> %44, %13		; <<4 x float>>:45 [#uses=1]
+	fmul <4 x float> %45, %14		; <<4 x float>>:46 [#uses=1]
+	fmul <4 x float> %46, %15		; <<4 x float>>:47 [#uses=1]
+	fmul <4 x float> %47, %16		; <<4 x float>>:48 [#uses=1]
+	fmul <4 x float> %48, %17		; <<4 x float>>:49 [#uses=1]
+	fmul <4 x float> %49, %18		; <<4 x float>>:50 [#uses=1]
+	fmul <4 x float> %50, %19		; <<4 x float>>:51 [#uses=1]
+	fmul <4 x float> %51, %20		; <<4 x float>>:52 [#uses=1]
+	fmul <4 x float> %52, %21		; <<4 x float>>:53 [#uses=1]
+	fmul <4 x float> %53, %22		; <<4 x float>>:54 [#uses=1]
+	fmul <4 x float> %54, %23		; <<4 x float>>:55 [#uses=1]
+	fmul <4 x float> %55, %24		; <<4 x float>>:56 [#uses=1]
+	fmul <4 x float> %56, %25		; <<4 x float>>:57 [#uses=1]
+	fmul <4 x float> %57, %26		; <<4 x float>>:58 [#uses=1]
+	fmul <4 x float> %58, %27		; <<4 x float>>:59 [#uses=1]
+	fmul <4 x float> %59, %28		; <<4 x float>>:60 [#uses=1]
+	fmul <4 x float> %60, %29		; <<4 x float>>:61 [#uses=1]
+	fmul <4 x float> %61, %30		; <<4 x float>>:62 [#uses=1]
+	fmul <4 x float> %62, %31		; <<4 x float>>:63 [#uses=1]
+	fmul <4 x float> %63, %32		; <<4 x float>>:64 [#uses=3]
+	fmul <4 x float> %2, %2		; <<4 x float>>:65 [#uses=1]
+	fmul <4 x float> %65, %3		; <<4 x float>>:66 [#uses=1]
+	fmul <4 x float> %66, %4		; <<4 x float>>:67 [#uses=1]
+	fmul <4 x float> %67, %5		; <<4 x float>>:68 [#uses=1]
+	fmul <4 x float> %68, %6		; <<4 x float>>:69 [#uses=1]
+	fmul <4 x float> %69, %7		; <<4 x float>>:70 [#uses=1]
+	fmul <4 x float> %70, %8		; <<4 x float>>:71 [#uses=1]
+	fmul <4 x float> %71, %9		; <<4 x float>>:72 [#uses=1]
+	fmul <4 x float> %72, %10		; <<4 x float>>:73 [#uses=1]
+	fmul <4 x float> %73, %11		; <<4 x float>>:74 [#uses=1]
+	fmul <4 x float> %74, %12		; <<4 x float>>:75 [#uses=1]
+	fmul <4 x float> %75, %13		; <<4 x float>>:76 [#uses=1]
+	fmul <4 x float> %76, %14		; <<4 x float>>:77 [#uses=1]
+	fmul <4 x float> %77, %15		; <<4 x float>>:78 [#uses=1]
+	fmul <4 x float> %78, %16		; <<4 x float>>:79 [#uses=1]
+	fmul <4 x float> %79, %17		; <<4 x float>>:80 [#uses=1]
+	fmul <4 x float> %80, %18		; <<4 x float>>:81 [#uses=1]
+	fmul <4 x float> %81, %19		; <<4 x float>>:82 [#uses=1]
+	fmul <4 x float> %82, %20		; <<4 x float>>:83 [#uses=1]
+	fmul <4 x float> %83, %21		; <<4 x float>>:84 [#uses=1]
+	fmul <4 x float> %84, %22		; <<4 x float>>:85 [#uses=1]
+	fmul <4 x float> %85, %23		; <<4 x float>>:86 [#uses=1]
+	fmul <4 x float> %86, %24		; <<4 x float>>:87 [#uses=1]
+	fmul <4 x float> %87, %25		; <<4 x float>>:88 [#uses=1]
+	fmul <4 x float> %88, %26		; <<4 x float>>:89 [#uses=1]
+	fmul <4 x float> %89, %27		; <<4 x float>>:90 [#uses=1]
+	fmul <4 x float> %90, %28		; <<4 x float>>:91 [#uses=1]
+	fmul <4 x float> %91, %29		; <<4 x float>>:92 [#uses=1]
+	fmul <4 x float> %92, %30		; <<4 x float>>:93 [#uses=1]
+	fmul <4 x float> %93, %31		; <<4 x float>>:94 [#uses=1]
+	fmul <4 x float> %94, %32		; <<4 x float>>:95 [#uses=1]
+	fmul <4 x float> %3, %3		; <<4 x float>>:96 [#uses=1]
+	fmul <4 x float> %96, %4		; <<4 x float>>:97 [#uses=1]
+	fmul <4 x float> %97, %5		; <<4 x float>>:98 [#uses=1]
+	fmul <4 x float> %98, %6		; <<4 x float>>:99 [#uses=1]
+	fmul <4 x float> %99, %7		; <<4 x float>>:100 [#uses=1]
+	fmul <4 x float> %100, %8		; <<4 x float>>:101 [#uses=1]
+	fmul <4 x float> %101, %9		; <<4 x float>>:102 [#uses=1]
+	fmul <4 x float> %102, %10		; <<4 x float>>:103 [#uses=1]
+	fmul <4 x float> %103, %11		; <<4 x float>>:104 [#uses=1]
+	fmul <4 x float> %104, %12		; <<4 x float>>:105 [#uses=1]
+	fmul <4 x float> %105, %13		; <<4 x float>>:106 [#uses=1]
+	fmul <4 x float> %106, %14		; <<4 x float>>:107 [#uses=1]
+	fmul <4 x float> %107, %15		; <<4 x float>>:108 [#uses=1]
+	fmul <4 x float> %108, %16		; <<4 x float>>:109 [#uses=1]
+	fmul <4 x float> %109, %17		; <<4 x float>>:110 [#uses=1]
+	fmul <4 x float> %110, %18		; <<4 x float>>:111 [#uses=1]
+	fmul <4 x float> %111, %19		; <<4 x float>>:112 [#uses=1]
+	fmul <4 x float> %112, %20		; <<4 x float>>:113 [#uses=1]
+	fmul <4 x float> %113, %21		; <<4 x float>>:114 [#uses=1]
+	fmul <4 x float> %114, %22		; <<4 x float>>:115 [#uses=1]
+	fmul <4 x float> %115, %23		; <<4 x float>>:116 [#uses=1]
+	fmul <4 x float> %116, %24		; <<4 x float>>:117 [#uses=1]
+	fmul <4 x float> %117, %25		; <<4 x float>>:118 [#uses=1]
+	fmul <4 x float> %118, %26		; <<4 x float>>:119 [#uses=1]
+	fmul <4 x float> %119, %27		; <<4 x float>>:120 [#uses=1]
+	fmul <4 x float> %120, %28		; <<4 x float>>:121 [#uses=1]
+	fmul <4 x float> %121, %29		; <<4 x float>>:122 [#uses=1]
+	fmul <4 x float> %122, %30		; <<4 x float>>:123 [#uses=1]
+	fmul <4 x float> %123, %31		; <<4 x float>>:124 [#uses=1]
+	fmul <4 x float> %124, %32		; <<4 x float>>:125 [#uses=1]
+	fmul <4 x float> %4, %4		; <<4 x float>>:126 [#uses=1]
+	fmul <4 x float> %126, %5		; <<4 x float>>:127 [#uses=1]
+	fmul <4 x float> %127, %6		; <<4 x float>>:128 [#uses=1]
+	fmul <4 x float> %128, %7		; <<4 x float>>:129 [#uses=1]
+	fmul <4 x float> %129, %8		; <<4 x float>>:130 [#uses=1]
+	fmul <4 x float> %130, %9		; <<4 x float>>:131 [#uses=1]
+	fmul <4 x float> %131, %10		; <<4 x float>>:132 [#uses=1]
+	fmul <4 x float> %132, %11		; <<4 x float>>:133 [#uses=1]
+	fmul <4 x float> %133, %12		; <<4 x float>>:134 [#uses=1]
+	fmul <4 x float> %134, %13		; <<4 x float>>:135 [#uses=1]
+	fmul <4 x float> %135, %14		; <<4 x float>>:136 [#uses=1]
+	fmul <4 x float> %136, %15		; <<4 x float>>:137 [#uses=1]
+	fmul <4 x float> %137, %16		; <<4 x float>>:138 [#uses=1]
+	fmul <4 x float> %138, %17		; <<4 x float>>:139 [#uses=1]
+	fmul <4 x float> %139, %18		; <<4 x float>>:140 [#uses=1]
+	fmul <4 x float> %140, %19		; <<4 x float>>:141 [#uses=1]
+	fmul <4 x float> %141, %20		; <<4 x float>>:142 [#uses=1]
+	fmul <4 x float> %142, %21		; <<4 x float>>:143 [#uses=1]
+	fmul <4 x float> %143, %22		; <<4 x float>>:144 [#uses=1]
+	fmul <4 x float> %144, %23		; <<4 x float>>:145 [#uses=1]
+	fmul <4 x float> %145, %24		; <<4 x float>>:146 [#uses=1]
+	fmul <4 x float> %146, %25		; <<4 x float>>:147 [#uses=1]
+	fmul <4 x float> %147, %26		; <<4 x float>>:148 [#uses=1]
+	fmul <4 x float> %148, %27		; <<4 x float>>:149 [#uses=1]
+	fmul <4 x float> %149, %28		; <<4 x float>>:150 [#uses=1]
+	fmul <4 x float> %150, %29		; <<4 x float>>:151 [#uses=1]
+	fmul <4 x float> %151, %30		; <<4 x float>>:152 [#uses=1]
+	fmul <4 x float> %152, %31		; <<4 x float>>:153 [#uses=1]
+	fmul <4 x float> %153, %32		; <<4 x float>>:154 [#uses=1]
+	fmul <4 x float> %5, %5		; <<4 x float>>:155 [#uses=1]
+	fmul <4 x float> %155, %6		; <<4 x float>>:156 [#uses=1]
+	fmul <4 x float> %156, %7		; <<4 x float>>:157 [#uses=1]
+	fmul <4 x float> %157, %8		; <<4 x float>>:158 [#uses=1]
+	fmul <4 x float> %158, %9		; <<4 x float>>:159 [#uses=1]
+	fmul <4 x float> %159, %10		; <<4 x float>>:160 [#uses=1]
+	fmul <4 x float> %160, %11		; <<4 x float>>:161 [#uses=1]
+	fmul <4 x float> %161, %12		; <<4 x float>>:162 [#uses=1]
+	fmul <4 x float> %162, %13		; <<4 x float>>:163 [#uses=1]
+	fmul <4 x float> %163, %14		; <<4 x float>>:164 [#uses=1]
+	fmul <4 x float> %164, %15		; <<4 x float>>:165 [#uses=1]
+	fmul <4 x float> %165, %16		; <<4 x float>>:166 [#uses=1]
+	fmul <4 x float> %166, %17		; <<4 x float>>:167 [#uses=1]
+	fmul <4 x float> %167, %18		; <<4 x float>>:168 [#uses=1]
+	fmul <4 x float> %168, %19		; <<4 x float>>:169 [#uses=1]
+	fmul <4 x float> %169, %20		; <<4 x float>>:170 [#uses=1]
+	fmul <4 x float> %170, %21		; <<4 x float>>:171 [#uses=1]
+	fmul <4 x float> %171, %22		; <<4 x float>>:172 [#uses=1]
+	fmul <4 x float> %172, %23		; <<4 x float>>:173 [#uses=1]
+	fmul <4 x float> %173, %24		; <<4 x float>>:174 [#uses=1]
+	fmul <4 x float> %174, %25		; <<4 x float>>:175 [#uses=1]
+	fmul <4 x float> %175, %26		; <<4 x float>>:176 [#uses=1]
+	fmul <4 x float> %176, %27		; <<4 x float>>:177 [#uses=1]
+	fmul <4 x float> %177, %28		; <<4 x float>>:178 [#uses=1]
+	fmul <4 x float> %178, %29		; <<4 x float>>:179 [#uses=1]
+	fmul <4 x float> %179, %30		; <<4 x float>>:180 [#uses=1]
+	fmul <4 x float> %180, %31		; <<4 x float>>:181 [#uses=1]
+	fmul <4 x float> %181, %32		; <<4 x float>>:182 [#uses=1]
+	fmul <4 x float> %6, %6		; <<4 x float>>:183 [#uses=1]
+	fmul <4 x float> %183, %7		; <<4 x float>>:184 [#uses=1]
+	fmul <4 x float> %184, %8		; <<4 x float>>:185 [#uses=1]
+	fmul <4 x float> %185, %9		; <<4 x float>>:186 [#uses=1]
+	fmul <4 x float> %186, %10		; <<4 x float>>:187 [#uses=1]
+	fmul <4 x float> %187, %11		; <<4 x float>>:188 [#uses=1]
+	fmul <4 x float> %188, %12		; <<4 x float>>:189 [#uses=1]
+	fmul <4 x float> %189, %13		; <<4 x float>>:190 [#uses=1]
+	fmul <4 x float> %190, %14		; <<4 x float>>:191 [#uses=1]
+	fmul <4 x float> %191, %15		; <<4 x float>>:192 [#uses=1]
+	fmul <4 x float> %192, %16		; <<4 x float>>:193 [#uses=1]
+	fmul <4 x float> %193, %17		; <<4 x float>>:194 [#uses=1]
+	fmul <4 x float> %194, %18		; <<4 x float>>:195 [#uses=1]
+	fmul <4 x float> %195, %19		; <<4 x float>>:196 [#uses=1]
+	fmul <4 x float> %196, %20		; <<4 x float>>:197 [#uses=1]
+	fmul <4 x float> %197, %21		; <<4 x float>>:198 [#uses=1]
+	fmul <4 x float> %198, %22		; <<4 x float>>:199 [#uses=1]
+	fmul <4 x float> %199, %23		; <<4 x float>>:200 [#uses=1]
+	fmul <4 x float> %200, %24		; <<4 x float>>:201 [#uses=1]
+	fmul <4 x float> %201, %25		; <<4 x float>>:202 [#uses=1]
+	fmul <4 x float> %202, %26		; <<4 x float>>:203 [#uses=1]
+	fmul <4 x float> %203, %27		; <<4 x float>>:204 [#uses=1]
+	fmul <4 x float> %204, %28		; <<4 x float>>:205 [#uses=1]
+	fmul <4 x float> %205, %29		; <<4 x float>>:206 [#uses=1]
+	fmul <4 x float> %206, %30		; <<4 x float>>:207 [#uses=1]
+	fmul <4 x float> %207, %31		; <<4 x float>>:208 [#uses=1]
+	fmul <4 x float> %208, %32		; <<4 x float>>:209 [#uses=1]
+	fmul <4 x float> %7, %7		; <<4 x float>>:210 [#uses=1]
+	fmul <4 x float> %210, %8		; <<4 x float>>:211 [#uses=1]
+	fmul <4 x float> %211, %9		; <<4 x float>>:212 [#uses=1]
+	fmul <4 x float> %212, %10		; <<4 x float>>:213 [#uses=1]
+	fmul <4 x float> %213, %11		; <<4 x float>>:214 [#uses=1]
+	fmul <4 x float> %214, %12		; <<4 x float>>:215 [#uses=1]
+	fmul <4 x float> %215, %13		; <<4 x float>>:216 [#uses=1]
+	fmul <4 x float> %216, %14		; <<4 x float>>:217 [#uses=1]
+	fmul <4 x float> %217, %15		; <<4 x float>>:218 [#uses=1]
+	fmul <4 x float> %218, %16		; <<4 x float>>:219 [#uses=1]
+	fmul <4 x float> %219, %17		; <<4 x float>>:220 [#uses=1]
+	fmul <4 x float> %220, %18		; <<4 x float>>:221 [#uses=1]
+	fmul <4 x float> %221, %19		; <<4 x float>>:222 [#uses=1]
+	fmul <4 x float> %222, %20		; <<4 x float>>:223 [#uses=1]
+	fmul <4 x float> %223, %21		; <<4 x float>>:224 [#uses=1]
+	fmul <4 x float> %224, %22		; <<4 x float>>:225 [#uses=1]
+	fmul <4 x float> %225, %23		; <<4 x float>>:226 [#uses=1]
+	fmul <4 x float> %226, %24		; <<4 x float>>:227 [#uses=1]
+	fmul <4 x float> %227, %25		; <<4 x float>>:228 [#uses=1]
+	fmul <4 x float> %228, %26		; <<4 x float>>:229 [#uses=1]
+	fmul <4 x float> %229, %27		; <<4 x float>>:230 [#uses=1]
+	fmul <4 x float> %230, %28		; <<4 x float>>:231 [#uses=1]
+	fmul <4 x float> %231, %29		; <<4 x float>>:232 [#uses=1]
+	fmul <4 x float> %232, %30		; <<4 x float>>:233 [#uses=1]
+	fmul <4 x float> %233, %31		; <<4 x float>>:234 [#uses=1]
+	fmul <4 x float> %234, %32		; <<4 x float>>:235 [#uses=1]
+	fmul <4 x float> %8, %8		; <<4 x float>>:236 [#uses=1]
+	fmul <4 x float> %236, %9		; <<4 x float>>:237 [#uses=1]
+	fmul <4 x float> %237, %10		; <<4 x float>>:238 [#uses=1]
+	fmul <4 x float> %238, %11		; <<4 x float>>:239 [#uses=1]
+	fmul <4 x float> %239, %12		; <<4 x float>>:240 [#uses=1]
+	fmul <4 x float> %240, %13		; <<4 x float>>:241 [#uses=1]
+	fmul <4 x float> %241, %14		; <<4 x float>>:242 [#uses=1]
+	fmul <4 x float> %242, %15		; <<4 x float>>:243 [#uses=1]
+	fmul <4 x float> %243, %16		; <<4 x float>>:244 [#uses=1]
+	fmul <4 x float> %244, %17		; <<4 x float>>:245 [#uses=1]
+	fmul <4 x float> %245, %18		; <<4 x float>>:246 [#uses=1]
+	fmul <4 x float> %246, %19		; <<4 x float>>:247 [#uses=1]
+	fmul <4 x float> %247, %20		; <<4 x float>>:248 [#uses=1]
+	fmul <4 x float> %248, %21		; <<4 x float>>:249 [#uses=1]
+	fmul <4 x float> %249, %22		; <<4 x float>>:250 [#uses=1]
+	fmul <4 x float> %250, %23		; <<4 x float>>:251 [#uses=1]
+	fmul <4 x float> %251, %24		; <<4 x float>>:252 [#uses=1]
+	fmul <4 x float> %252, %25		; <<4 x float>>:253 [#uses=1]
+	fmul <4 x float> %253, %26		; <<4 x float>>:254 [#uses=1]
+	fmul <4 x float> %254, %27		; <<4 x float>>:255 [#uses=1]
+	fmul <4 x float> %255, %28		; <<4 x float>>:256 [#uses=1]
+	fmul <4 x float> %256, %29		; <<4 x float>>:257 [#uses=1]
+	fmul <4 x float> %257, %30		; <<4 x float>>:258 [#uses=1]
+	fmul <4 x float> %258, %31		; <<4 x float>>:259 [#uses=1]
+	fmul <4 x float> %259, %32		; <<4 x float>>:260 [#uses=1]
+	fmul <4 x float> %9, %9		; <<4 x float>>:261 [#uses=1]
+	fmul <4 x float> %261, %10		; <<4 x float>>:262 [#uses=1]
+	fmul <4 x float> %262, %11		; <<4 x float>>:263 [#uses=1]
+	fmul <4 x float> %263, %12		; <<4 x float>>:264 [#uses=1]
+	fmul <4 x float> %264, %13		; <<4 x float>>:265 [#uses=1]
+	fmul <4 x float> %265, %14		; <<4 x float>>:266 [#uses=1]
+	fmul <4 x float> %266, %15		; <<4 x float>>:267 [#uses=1]
+	fmul <4 x float> %267, %16		; <<4 x float>>:268 [#uses=1]
+	fmul <4 x float> %268, %17		; <<4 x float>>:269 [#uses=1]
+	fmul <4 x float> %269, %18		; <<4 x float>>:270 [#uses=1]
+	fmul <4 x float> %270, %19		; <<4 x float>>:271 [#uses=1]
+	fmul <4 x float> %271, %20		; <<4 x float>>:272 [#uses=1]
+	fmul <4 x float> %272, %21		; <<4 x float>>:273 [#uses=1]
+	fmul <4 x float> %273, %22		; <<4 x float>>:274 [#uses=1]
+	fmul <4 x float> %274, %23		; <<4 x float>>:275 [#uses=1]
+	fmul <4 x float> %275, %24		; <<4 x float>>:276 [#uses=1]
+	fmul <4 x float> %276, %25		; <<4 x float>>:277 [#uses=1]
+	fmul <4 x float> %277, %26		; <<4 x float>>:278 [#uses=1]
+	fmul <4 x float> %278, %27		; <<4 x float>>:279 [#uses=1]
+	fmul <4 x float> %279, %28		; <<4 x float>>:280 [#uses=1]
+	fmul <4 x float> %280, %29		; <<4 x float>>:281 [#uses=1]
+	fmul <4 x float> %281, %30		; <<4 x float>>:282 [#uses=1]
+	fmul <4 x float> %282, %31		; <<4 x float>>:283 [#uses=1]
+	fmul <4 x float> %283, %32		; <<4 x float>>:284 [#uses=1]
+	fmul <4 x float> %10, %10		; <<4 x float>>:285 [#uses=1]
+	fmul <4 x float> %285, %11		; <<4 x float>>:286 [#uses=1]
+	fmul <4 x float> %286, %12		; <<4 x float>>:287 [#uses=1]
+	fmul <4 x float> %287, %13		; <<4 x float>>:288 [#uses=1]
+	fmul <4 x float> %288, %14		; <<4 x float>>:289 [#uses=1]
+	fmul <4 x float> %289, %15		; <<4 x float>>:290 [#uses=1]
+	fmul <4 x float> %290, %16		; <<4 x float>>:291 [#uses=1]
+	fmul <4 x float> %291, %17		; <<4 x float>>:292 [#uses=1]
+	fmul <4 x float> %292, %18		; <<4 x float>>:293 [#uses=1]
+	fmul <4 x float> %293, %19		; <<4 x float>>:294 [#uses=1]
+	fmul <4 x float> %294, %20		; <<4 x float>>:295 [#uses=1]
+	fmul <4 x float> %295, %21		; <<4 x float>>:296 [#uses=1]
+	fmul <4 x float> %296, %22		; <<4 x float>>:297 [#uses=1]
+	fmul <4 x float> %297, %23		; <<4 x float>>:298 [#uses=1]
+	fmul <4 x float> %298, %24		; <<4 x float>>:299 [#uses=1]
+	fmul <4 x float> %299, %25		; <<4 x float>>:300 [#uses=1]
+	fmul <4 x float> %300, %26		; <<4 x float>>:301 [#uses=1]
+	fmul <4 x float> %301, %27		; <<4 x float>>:302 [#uses=1]
+	fmul <4 x float> %302, %28		; <<4 x float>>:303 [#uses=1]
+	fmul <4 x float> %303, %29		; <<4 x float>>:304 [#uses=1]
+	fmul <4 x float> %304, %30		; <<4 x float>>:305 [#uses=1]
+	fmul <4 x float> %305, %31		; <<4 x float>>:306 [#uses=1]
+	fmul <4 x float> %306, %32		; <<4 x float>>:307 [#uses=1]
+	fmul <4 x float> %11, %11		; <<4 x float>>:308 [#uses=1]
+	fmul <4 x float> %308, %12		; <<4 x float>>:309 [#uses=1]
+	fmul <4 x float> %309, %13		; <<4 x float>>:310 [#uses=1]
+	fmul <4 x float> %310, %14		; <<4 x float>>:311 [#uses=1]
+	fmul <4 x float> %311, %15		; <<4 x float>>:312 [#uses=1]
+	fmul <4 x float> %312, %16		; <<4 x float>>:313 [#uses=1]
+	fmul <4 x float> %313, %17		; <<4 x float>>:314 [#uses=1]
+	fmul <4 x float> %314, %18		; <<4 x float>>:315 [#uses=1]
+	fmul <4 x float> %315, %19		; <<4 x float>>:316 [#uses=1]
+	fmul <4 x float> %316, %20		; <<4 x float>>:317 [#uses=1]
+	fmul <4 x float> %317, %21		; <<4 x float>>:318 [#uses=1]
+	fmul <4 x float> %318, %22		; <<4 x float>>:319 [#uses=1]
+	fmul <4 x float> %319, %23		; <<4 x float>>:320 [#uses=1]
+	fmul <4 x float> %320, %24		; <<4 x float>>:321 [#uses=1]
+	fmul <4 x float> %321, %25		; <<4 x float>>:322 [#uses=1]
+	fmul <4 x float> %322, %26		; <<4 x float>>:323 [#uses=1]
+	fmul <4 x float> %323, %27		; <<4 x float>>:324 [#uses=1]
+	fmul <4 x float> %324, %28		; <<4 x float>>:325 [#uses=1]
+	fmul <4 x float> %325, %29		; <<4 x float>>:326 [#uses=1]
+	fmul <4 x float> %326, %30		; <<4 x float>>:327 [#uses=1]
+	fmul <4 x float> %327, %31		; <<4 x float>>:328 [#uses=1]
+	fmul <4 x float> %328, %32		; <<4 x float>>:329 [#uses=1]
+	fmul <4 x float> %12, %12		; <<4 x float>>:330 [#uses=1]
+	fmul <4 x float> %330, %13		; <<4 x float>>:331 [#uses=1]
+	fmul <4 x float> %331, %14		; <<4 x float>>:332 [#uses=1]
+	fmul <4 x float> %332, %15		; <<4 x float>>:333 [#uses=1]
+	fmul <4 x float> %333, %16		; <<4 x float>>:334 [#uses=1]
+	fmul <4 x float> %334, %17		; <<4 x float>>:335 [#uses=1]
+	fmul <4 x float> %335, %18		; <<4 x float>>:336 [#uses=1]
+	fmul <4 x float> %336, %19		; <<4 x float>>:337 [#uses=1]
+	fmul <4 x float> %337, %20		; <<4 x float>>:338 [#uses=1]
+	fmul <4 x float> %338, %21		; <<4 x float>>:339 [#uses=1]
+	fmul <4 x float> %339, %22		; <<4 x float>>:340 [#uses=1]
+	fmul <4 x float> %340, %23		; <<4 x float>>:341 [#uses=1]
+	fmul <4 x float> %341, %24		; <<4 x float>>:342 [#uses=1]
+	fmul <4 x float> %342, %25		; <<4 x float>>:343 [#uses=1]
+	fmul <4 x float> %343, %26		; <<4 x float>>:344 [#uses=1]
+	fmul <4 x float> %344, %27		; <<4 x float>>:345 [#uses=1]
+	fmul <4 x float> %345, %28		; <<4 x float>>:346 [#uses=1]
+	fmul <4 x float> %346, %29		; <<4 x float>>:347 [#uses=1]
+	fmul <4 x float> %347, %30		; <<4 x float>>:348 [#uses=1]
+	fmul <4 x float> %348, %31		; <<4 x float>>:349 [#uses=1]
+	fmul <4 x float> %349, %32		; <<4 x float>>:350 [#uses=1]
+	fmul <4 x float> %13, %13		; <<4 x float>>:351 [#uses=1]
+	fmul <4 x float> %351, %14		; <<4 x float>>:352 [#uses=1]
+	fmul <4 x float> %352, %15		; <<4 x float>>:353 [#uses=1]
+	fmul <4 x float> %353, %16		; <<4 x float>>:354 [#uses=1]
+	fmul <4 x float> %354, %17		; <<4 x float>>:355 [#uses=1]
+	fmul <4 x float> %355, %18		; <<4 x float>>:356 [#uses=1]
+	fmul <4 x float> %356, %19		; <<4 x float>>:357 [#uses=1]
+	fmul <4 x float> %357, %20		; <<4 x float>>:358 [#uses=1]
+	fmul <4 x float> %358, %21		; <<4 x float>>:359 [#uses=1]
+	fmul <4 x float> %359, %22		; <<4 x float>>:360 [#uses=1]
+	fmul <4 x float> %360, %23		; <<4 x float>>:361 [#uses=1]
+	fmul <4 x float> %361, %24		; <<4 x float>>:362 [#uses=1]
+	fmul <4 x float> %362, %25		; <<4 x float>>:363 [#uses=1]
+	fmul <4 x float> %363, %26		; <<4 x float>>:364 [#uses=1]
+	fmul <4 x float> %364, %27		; <<4 x float>>:365 [#uses=1]
+	fmul <4 x float> %365, %28		; <<4 x float>>:366 [#uses=1]
+	fmul <4 x float> %366, %29		; <<4 x float>>:367 [#uses=1]
+	fmul <4 x float> %367, %30		; <<4 x float>>:368 [#uses=1]
+	fmul <4 x float> %368, %31		; <<4 x float>>:369 [#uses=1]
+	fmul <4 x float> %369, %32		; <<4 x float>>:370 [#uses=1]
+	fmul <4 x float> %14, %14		; <<4 x float>>:371 [#uses=1]
+	fmul <4 x float> %371, %15		; <<4 x float>>:372 [#uses=1]
+	fmul <4 x float> %372, %16		; <<4 x float>>:373 [#uses=1]
+	fmul <4 x float> %373, %17		; <<4 x float>>:374 [#uses=1]
+	fmul <4 x float> %374, %18		; <<4 x float>>:375 [#uses=1]
+	fmul <4 x float> %375, %19		; <<4 x float>>:376 [#uses=1]
+	fmul <4 x float> %376, %20		; <<4 x float>>:377 [#uses=1]
+	fmul <4 x float> %377, %21		; <<4 x float>>:378 [#uses=1]
+	fmul <4 x float> %378, %22		; <<4 x float>>:379 [#uses=1]
+	fmul <4 x float> %379, %23		; <<4 x float>>:380 [#uses=1]
+	fmul <4 x float> %380, %24		; <<4 x float>>:381 [#uses=1]
+	fmul <4 x float> %381, %25		; <<4 x float>>:382 [#uses=1]
+	fmul <4 x float> %382, %26		; <<4 x float>>:383 [#uses=1]
+	fmul <4 x float> %383, %27		; <<4 x float>>:384 [#uses=1]
+	fmul <4 x float> %384, %28		; <<4 x float>>:385 [#uses=1]
+	fmul <4 x float> %385, %29		; <<4 x float>>:386 [#uses=1]
+	fmul <4 x float> %386, %30		; <<4 x float>>:387 [#uses=1]
+	fmul <4 x float> %387, %31		; <<4 x float>>:388 [#uses=1]
+	fmul <4 x float> %388, %32		; <<4 x float>>:389 [#uses=1]
+	fmul <4 x float> %15, %15		; <<4 x float>>:390 [#uses=1]
+	fmul <4 x float> %390, %16		; <<4 x float>>:391 [#uses=1]
+	fmul <4 x float> %391, %17		; <<4 x float>>:392 [#uses=1]
+	fmul <4 x float> %392, %18		; <<4 x float>>:393 [#uses=1]
+	fmul <4 x float> %393, %19		; <<4 x float>>:394 [#uses=1]
+	fmul <4 x float> %394, %20		; <<4 x float>>:395 [#uses=1]
+	fmul <4 x float> %395, %21		; <<4 x float>>:396 [#uses=1]
+	fmul <4 x float> %396, %22		; <<4 x float>>:397 [#uses=1]
+	fmul <4 x float> %397, %23		; <<4 x float>>:398 [#uses=1]
+	fmul <4 x float> %398, %24		; <<4 x float>>:399 [#uses=1]
+	fmul <4 x float> %399, %25		; <<4 x float>>:400 [#uses=1]
+	fmul <4 x float> %400, %26		; <<4 x float>>:401 [#uses=1]
+	fmul <4 x float> %401, %27		; <<4 x float>>:402 [#uses=1]
+	fmul <4 x float> %402, %28		; <<4 x float>>:403 [#uses=1]
+	fmul <4 x float> %403, %29		; <<4 x float>>:404 [#uses=1]
+	fmul <4 x float> %404, %30		; <<4 x float>>:405 [#uses=1]
+	fmul <4 x float> %405, %31		; <<4 x float>>:406 [#uses=1]
+	fmul <4 x float> %406, %32		; <<4 x float>>:407 [#uses=1]
+	fmul <4 x float> %16, %16		; <<4 x float>>:408 [#uses=1]
+	fmul <4 x float> %408, %17		; <<4 x float>>:409 [#uses=1]
+	fmul <4 x float> %409, %18		; <<4 x float>>:410 [#uses=1]
+	fmul <4 x float> %410, %19		; <<4 x float>>:411 [#uses=1]
+	fmul <4 x float> %411, %20		; <<4 x float>>:412 [#uses=1]
+	fmul <4 x float> %412, %21		; <<4 x float>>:413 [#uses=1]
+	fmul <4 x float> %413, %22		; <<4 x float>>:414 [#uses=1]
+	fmul <4 x float> %414, %23		; <<4 x float>>:415 [#uses=1]
+	fmul <4 x float> %415, %24		; <<4 x float>>:416 [#uses=1]
+	fmul <4 x float> %416, %25		; <<4 x float>>:417 [#uses=1]
+	fmul <4 x float> %417, %26		; <<4 x float>>:418 [#uses=1]
+	fmul <4 x float> %418, %27		; <<4 x float>>:419 [#uses=1]
+	fmul <4 x float> %419, %28		; <<4 x float>>:420 [#uses=1]
+	fmul <4 x float> %420, %29		; <<4 x float>>:421 [#uses=1]
+	fmul <4 x float> %421, %30		; <<4 x float>>:422 [#uses=1]
+	fmul <4 x float> %422, %31		; <<4 x float>>:423 [#uses=1]
+	fmul <4 x float> %423, %32		; <<4 x float>>:424 [#uses=1]
+	fmul <4 x float> %17, %17		; <<4 x float>>:425 [#uses=1]
+	fmul <4 x float> %425, %18		; <<4 x float>>:426 [#uses=1]
+	fmul <4 x float> %426, %19		; <<4 x float>>:427 [#uses=1]
+	fmul <4 x float> %427, %20		; <<4 x float>>:428 [#uses=1]
+	fmul <4 x float> %428, %21		; <<4 x float>>:429 [#uses=1]
+	fmul <4 x float> %429, %22		; <<4 x float>>:430 [#uses=1]
+	fmul <4 x float> %430, %23		; <<4 x float>>:431 [#uses=1]
+	fmul <4 x float> %431, %24		; <<4 x float>>:432 [#uses=1]
+	fmul <4 x float> %432, %25		; <<4 x float>>:433 [#uses=1]
+	fmul <4 x float> %433, %26		; <<4 x float>>:434 [#uses=1]
+	fmul <4 x float> %434, %27		; <<4 x float>>:435 [#uses=1]
+	fmul <4 x float> %435, %28		; <<4 x float>>:436 [#uses=1]
+	fmul <4 x float> %436, %29		; <<4 x float>>:437 [#uses=1]
+	fmul <4 x float> %437, %30		; <<4 x float>>:438 [#uses=1]
+	fmul <4 x float> %438, %31		; <<4 x float>>:439 [#uses=1]
+	fmul <4 x float> %439, %32		; <<4 x float>>:440 [#uses=1]
+	fmul <4 x float> %18, %18		; <<4 x float>>:441 [#uses=1]
+	fmul <4 x float> %441, %19		; <<4 x float>>:442 [#uses=1]
+	fmul <4 x float> %442, %20		; <<4 x float>>:443 [#uses=1]
+	fmul <4 x float> %443, %21		; <<4 x float>>:444 [#uses=1]
+	fmul <4 x float> %444, %22		; <<4 x float>>:445 [#uses=1]
+	fmul <4 x float> %445, %23		; <<4 x float>>:446 [#uses=1]
+	fmul <4 x float> %446, %24		; <<4 x float>>:447 [#uses=1]
+	fmul <4 x float> %447, %25		; <<4 x float>>:448 [#uses=1]
+	fmul <4 x float> %448, %26		; <<4 x float>>:449 [#uses=1]
+	fmul <4 x float> %449, %27		; <<4 x float>>:450 [#uses=1]
+	fmul <4 x float> %450, %28		; <<4 x float>>:451 [#uses=1]
+	fmul <4 x float> %451, %29		; <<4 x float>>:452 [#uses=1]
+	fmul <4 x float> %452, %30		; <<4 x float>>:453 [#uses=1]
+	fmul <4 x float> %453, %31		; <<4 x float>>:454 [#uses=1]
+	fmul <4 x float> %454, %32		; <<4 x float>>:455 [#uses=1]
+	fmul <4 x float> %19, %19		; <<4 x float>>:456 [#uses=1]
+	fmul <4 x float> %456, %20		; <<4 x float>>:457 [#uses=1]
+	fmul <4 x float> %457, %21		; <<4 x float>>:458 [#uses=1]
+	fmul <4 x float> %458, %22		; <<4 x float>>:459 [#uses=1]
+	fmul <4 x float> %459, %23		; <<4 x float>>:460 [#uses=1]
+	fmul <4 x float> %460, %24		; <<4 x float>>:461 [#uses=1]
+	fmul <4 x float> %461, %25		; <<4 x float>>:462 [#uses=1]
+	fmul <4 x float> %462, %26		; <<4 x float>>:463 [#uses=1]
+	fmul <4 x float> %463, %27		; <<4 x float>>:464 [#uses=1]
+	fmul <4 x float> %464, %28		; <<4 x float>>:465 [#uses=1]
+	fmul <4 x float> %465, %29		; <<4 x float>>:466 [#uses=1]
+	fmul <4 x float> %466, %30		; <<4 x float>>:467 [#uses=1]
+	fmul <4 x float> %467, %31		; <<4 x float>>:468 [#uses=1]
+	fmul <4 x float> %468, %32		; <<4 x float>>:469 [#uses=1]
+	fmul <4 x float> %20, %20		; <<4 x float>>:470 [#uses=1]
+	fmul <4 x float> %470, %21		; <<4 x float>>:471 [#uses=1]
+	fmul <4 x float> %471, %22		; <<4 x float>>:472 [#uses=1]
+	fmul <4 x float> %472, %23		; <<4 x float>>:473 [#uses=1]
+	fmul <4 x float> %473, %24		; <<4 x float>>:474 [#uses=1]
+	fmul <4 x float> %474, %25		; <<4 x float>>:475 [#uses=1]
+	fmul <4 x float> %475, %26		; <<4 x float>>:476 [#uses=1]
+	fmul <4 x float> %476, %27		; <<4 x float>>:477 [#uses=1]
+	fmul <4 x float> %477, %28		; <<4 x float>>:478 [#uses=1]
+	fmul <4 x float> %478, %29		; <<4 x float>>:479 [#uses=1]
+	fmul <4 x float> %479, %30		; <<4 x float>>:480 [#uses=1]
+	fmul <4 x float> %480, %31		; <<4 x float>>:481 [#uses=1]
+	fmul <4 x float> %481, %32		; <<4 x float>>:482 [#uses=1]
+	fmul <4 x float> %21, %21		; <<4 x float>>:483 [#uses=1]
+	fmul <4 x float> %483, %22		; <<4 x float>>:484 [#uses=1]
+	fmul <4 x float> %484, %23		; <<4 x float>>:485 [#uses=1]
+	fmul <4 x float> %485, %24		; <<4 x float>>:486 [#uses=1]
+	fmul <4 x float> %486, %25		; <<4 x float>>:487 [#uses=1]
+	fmul <4 x float> %487, %26		; <<4 x float>>:488 [#uses=1]
+	fmul <4 x float> %488, %27		; <<4 x float>>:489 [#uses=1]
+	fmul <4 x float> %489, %28		; <<4 x float>>:490 [#uses=1]
+	fmul <4 x float> %490, %29		; <<4 x float>>:491 [#uses=1]
+	fmul <4 x float> %491, %30		; <<4 x float>>:492 [#uses=1]
+	fmul <4 x float> %492, %31		; <<4 x float>>:493 [#uses=1]
+	fmul <4 x float> %493, %32		; <<4 x float>>:494 [#uses=1]
+	fmul <4 x float> %22, %22		; <<4 x float>>:495 [#uses=1]
+	fmul <4 x float> %495, %23		; <<4 x float>>:496 [#uses=1]
+	fmul <4 x float> %496, %24		; <<4 x float>>:497 [#uses=1]
+	fmul <4 x float> %497, %25		; <<4 x float>>:498 [#uses=1]
+	fmul <4 x float> %498, %26		; <<4 x float>>:499 [#uses=1]
+	fmul <4 x float> %499, %27		; <<4 x float>>:500 [#uses=1]
+	fmul <4 x float> %500, %28		; <<4 x float>>:501 [#uses=1]
+	fmul <4 x float> %501, %29		; <<4 x float>>:502 [#uses=1]
+	fmul <4 x float> %502, %30		; <<4 x float>>:503 [#uses=1]
+	fmul <4 x float> %503, %31		; <<4 x float>>:504 [#uses=1]
+	fmul <4 x float> %504, %32		; <<4 x float>>:505 [#uses=1]
+	fmul <4 x float> %23, %23		; <<4 x float>>:506 [#uses=1]
+	fmul <4 x float> %506, %24		; <<4 x float>>:507 [#uses=1]
+	fmul <4 x float> %507, %25		; <<4 x float>>:508 [#uses=1]
+	fmul <4 x float> %508, %26		; <<4 x float>>:509 [#uses=1]
+	fmul <4 x float> %509, %27		; <<4 x float>>:510 [#uses=1]
+	fmul <4 x float> %510, %28		; <<4 x float>>:511 [#uses=1]
+	fmul <4 x float> %511, %29		; <<4 x float>>:512 [#uses=1]
+	fmul <4 x float> %512, %30		; <<4 x float>>:513 [#uses=1]
+	fmul <4 x float> %513, %31		; <<4 x float>>:514 [#uses=1]
+	fmul <4 x float> %514, %32		; <<4 x float>>:515 [#uses=1]
+	fmul <4 x float> %24, %24		; <<4 x float>>:516 [#uses=1]
+	fmul <4 x float> %516, %25		; <<4 x float>>:517 [#uses=1]
+	fmul <4 x float> %517, %26		; <<4 x float>>:518 [#uses=1]
+	fmul <4 x float> %518, %27		; <<4 x float>>:519 [#uses=1]
+	fmul <4 x float> %519, %28		; <<4 x float>>:520 [#uses=1]
+	fmul <4 x float> %520, %29		; <<4 x float>>:521 [#uses=1]
+	fmul <4 x float> %521, %30		; <<4 x float>>:522 [#uses=1]
+	fmul <4 x float> %522, %31		; <<4 x float>>:523 [#uses=1]
+	fmul <4 x float> %523, %32		; <<4 x float>>:524 [#uses=1]
+	fmul <4 x float> %25, %25		; <<4 x float>>:525 [#uses=1]
+	fmul <4 x float> %525, %26		; <<4 x float>>:526 [#uses=1]
+	fmul <4 x float> %526, %27		; <<4 x float>>:527 [#uses=1]
+	fmul <4 x float> %527, %28		; <<4 x float>>:528 [#uses=1]
+	fmul <4 x float> %528, %29		; <<4 x float>>:529 [#uses=1]
+	fmul <4 x float> %529, %30		; <<4 x float>>:530 [#uses=1]
+	fmul <4 x float> %530, %31		; <<4 x float>>:531 [#uses=1]
+	fmul <4 x float> %531, %32		; <<4 x float>>:532 [#uses=1]
+	fmul <4 x float> %26, %26		; <<4 x float>>:533 [#uses=1]
+	fmul <4 x float> %533, %27		; <<4 x float>>:534 [#uses=1]
+	fmul <4 x float> %534, %28		; <<4 x float>>:535 [#uses=1]
+	fmul <4 x float> %535, %29		; <<4 x float>>:536 [#uses=1]
+	fmul <4 x float> %536, %30		; <<4 x float>>:537 [#uses=1]
+	fmul <4 x float> %537, %31		; <<4 x float>>:538 [#uses=1]
+	fmul <4 x float> %538, %32		; <<4 x float>>:539 [#uses=1]
+	fmul <4 x float> %27, %27		; <<4 x float>>:540 [#uses=1]
+	fmul <4 x float> %540, %28		; <<4 x float>>:541 [#uses=1]
+	fmul <4 x float> %541, %29		; <<4 x float>>:542 [#uses=1]
+	fmul <4 x float> %542, %30		; <<4 x float>>:543 [#uses=1]
+	fmul <4 x float> %543, %31		; <<4 x float>>:544 [#uses=1]
+	fmul <4 x float> %544, %32		; <<4 x float>>:545 [#uses=1]
+	fmul <4 x float> %28, %28		; <<4 x float>>:546 [#uses=1]
+	fmul <4 x float> %546, %29		; <<4 x float>>:547 [#uses=1]
+	fmul <4 x float> %547, %30		; <<4 x float>>:548 [#uses=1]
+	fmul <4 x float> %548, %31		; <<4 x float>>:549 [#uses=1]
+	fmul <4 x float> %549, %32		; <<4 x float>>:550 [#uses=1]
+	fmul <4 x float> %29, %29		; <<4 x float>>:551 [#uses=1]
+	fmul <4 x float> %551, %30		; <<4 x float>>:552 [#uses=1]
+	fmul <4 x float> %552, %31		; <<4 x float>>:553 [#uses=1]
+	fmul <4 x float> %553, %32		; <<4 x float>>:554 [#uses=1]
+	fmul <4 x float> %30, %30		; <<4 x float>>:555 [#uses=1]
+	fmul <4 x float> %555, %31		; <<4 x float>>:556 [#uses=1]
+	fmul <4 x float> %556, %32		; <<4 x float>>:557 [#uses=1]
+	fmul <4 x float> %31, %31		; <<4 x float>>:558 [#uses=1]
+	fmul <4 x float> %558, %32		; <<4 x float>>:559 [#uses=1]
+	fmul <4 x float> %32, %32		; <<4 x float>>:560 [#uses=1]
+	fadd <4 x float> %64, %64		; <<4 x float>>:561 [#uses=1]
+	fadd <4 x float> %561, %64		; <<4 x float>>:562 [#uses=1]
+	fadd <4 x float> %562, %95		; <<4 x float>>:563 [#uses=1]
+	fadd <4 x float> %563, %125		; <<4 x float>>:564 [#uses=1]
+	fadd <4 x float> %564, %154		; <<4 x float>>:565 [#uses=1]
+	fadd <4 x float> %565, %182		; <<4 x float>>:566 [#uses=1]
+	fadd <4 x float> %566, %209		; <<4 x float>>:567 [#uses=1]
+	fadd <4 x float> %567, %235		; <<4 x float>>:568 [#uses=1]
+	fadd <4 x float> %568, %260		; <<4 x float>>:569 [#uses=1]
+	fadd <4 x float> %569, %284		; <<4 x float>>:570 [#uses=1]
+	fadd <4 x float> %570, %307		; <<4 x float>>:571 [#uses=1]
+	fadd <4 x float> %571, %329		; <<4 x float>>:572 [#uses=1]
+	fadd <4 x float> %572, %350		; <<4 x float>>:573 [#uses=1]
+	fadd <4 x float> %573, %370		; <<4 x float>>:574 [#uses=1]
+	fadd <4 x float> %574, %389		; <<4 x float>>:575 [#uses=1]
+	fadd <4 x float> %575, %407		; <<4 x float>>:576 [#uses=1]
+	fadd <4 x float> %576, %424		; <<4 x float>>:577 [#uses=1]
+	fadd <4 x float> %577, %440		; <<4 x float>>:578 [#uses=1]
+	fadd <4 x float> %578, %455		; <<4 x float>>:579 [#uses=1]
+	fadd <4 x float> %579, %469		; <<4 x float>>:580 [#uses=1]
+	fadd <4 x float> %580, %482		; <<4 x float>>:581 [#uses=1]
+	fadd <4 x float> %581, %494		; <<4 x float>>:582 [#uses=1]
+	fadd <4 x float> %582, %505		; <<4 x float>>:583 [#uses=1]
+	fadd <4 x float> %583, %515		; <<4 x float>>:584 [#uses=1]
+	fadd <4 x float> %584, %524		; <<4 x float>>:585 [#uses=1]
+	fadd <4 x float> %585, %532		; <<4 x float>>:586 [#uses=1]
+	fadd <4 x float> %586, %539		; <<4 x float>>:587 [#uses=1]
+	fadd <4 x float> %587, %545		; <<4 x float>>:588 [#uses=1]
+	fadd <4 x float> %588, %550		; <<4 x float>>:589 [#uses=1]
+	fadd <4 x float> %589, %554		; <<4 x float>>:590 [#uses=1]
+	fadd <4 x float> %590, %557		; <<4 x float>>:591 [#uses=1]
+	fadd <4 x float> %591, %559		; <<4 x float>>:592 [#uses=1]
+	fadd <4 x float> %592, %560		; <<4 x float>>:593 [#uses=1]
 	store <4 x float> %593, <4 x float>* @0, align 1
 	ret void
 }
diff --git a/test/CodeGen/X86/2008-07-23-VSetCC.ll b/test/CodeGen/X86/2008-07-23-VSetCC.ll
index 735c610bc7a9..da6c089c460f 100644
--- a/test/CodeGen/X86/2008-07-23-VSetCC.ll
+++ b/test/CodeGen/X86/2008-07-23-VSetCC.ll
@@ -13,12 +13,12 @@ bb.nph:		; preds = %bb.nph, %0
 	insertelement <4 x i32> zeroinitializer, i32 %5, i32 3		; <<4 x i32>>:6 [#uses=1]
 	and <4 x i32> zeroinitializer, %6		; <<4 x i32>>:7 [#uses=1]
 	bitcast <4 x i32> %7 to <4 x float>		; <<4 x float>>:8 [#uses=1]
-	mul <4 x float> zeroinitializer, %8		; <<4 x float>>:9 [#uses=1]
+	fmul <4 x float> zeroinitializer, %8		; <<4 x float>>:9 [#uses=1]
 	bitcast <4 x float> %9 to <4 x i32>		; <<4 x i32>>:10 [#uses=1]
 	or <4 x i32> %10, zeroinitializer		; <<4 x i32>>:11 [#uses=1]
 	bitcast <4 x i32> %11 to <4 x float>		; <<4 x float>>:12 [#uses=1]
-	mul <4 x float> %12, < float 1.000000e+02, float 1.000000e+02, float 1.000000e+02, float 1.000000e+02 >		; <<4 x float>>:13 [#uses=1]
-	sub <4 x float> %13, < float 1.000000e+02, float 1.000000e+02, float 1.000000e+02, float 1.000000e+02 >		; <<4 x float>>:14 [#uses=1]
+	fmul <4 x float> %12, < float 1.000000e+02, float 1.000000e+02, float 1.000000e+02, float 1.000000e+02 >		; <<4 x float>>:13 [#uses=1]
+	fsub <4 x float> %13, < float 1.000000e+02, float 1.000000e+02, float 1.000000e+02, float 1.000000e+02 >		; <<4 x float>>:14 [#uses=1]
 	extractelement <4 x float> %14, i32 3		; <float>:15 [#uses=1]
 	call float @fmaxf( float 0.000000e+00, float %15 )		; <float>:16 [#uses=0]
 	br label %bb.nph
diff --git a/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll b/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
index b50f2b0a6087..4e3533287dbc 100644
--- a/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
+++ b/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
@@ -13,7 +13,7 @@ bb151:		; preds = %entry
 
 bb163:		; preds = %bb151, %entry
 	%tmp366 = load double* null, align 8		; <double> [#uses=1]
-	%tmp368 = mul double %tmp366, 0.000000e+00		; <double> [#uses=1]
+	%tmp368 = fmul double %tmp366, 0.000000e+00		; <double> [#uses=1]
 	%tmp368226 = bitcast double %tmp368 to i64		; <i64> [#uses=1]
 	br label %bb5.i
 
diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
index 2c8e12fb26ff..ad13b8528372 100644
--- a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
@@ -26,7 +26,7 @@ bb22.preheader:		; preds = %bb24.preheader, %bb22.preheader
 	br label %bb22.preheader
 
 bb25:		; preds = %bb24.preheader
-	%7 = mul double 0.000000e+00, %6		; <double> [#uses=0]
+	%7 = fmul double 0.000000e+00, %6		; <double> [#uses=0]
 	%8 = add i32 %i3.122100, 0		; <i32> [#uses=1]
 	%9 = icmp sgt i32 %8, 0		; <i1> [#uses=1]
 	br i1 %9, label %bb3, label %bb24.preheader
@@ -37,7 +37,7 @@ bb24.preheader:		; preds = %bb25, %bb18
 	br i1 %10, label %bb25, label %bb22.preheader
 
 bb30.loopexit:		; preds = %bb
-	%11 = mul double 0.000000e+00, 0x401921FB54442D1C		; <double> [#uses=1]
+	%11 = fmul double 0.000000e+00, 0x401921FB54442D1C		; <double> [#uses=1]
 	br label %bb3
 }
 
diff --git a/test/CodeGen/X86/2008-11-03-F80VAARG.ll b/test/CodeGen/X86/2008-11-03-F80VAARG.ll
index bb9fbdba4aec..36a054a3e6f8 100644
--- a/test/CodeGen/X86/2008-11-03-F80VAARG.ll
+++ b/test/CodeGen/X86/2008-11-03-F80VAARG.ll
@@ -12,6 +12,6 @@ define x86_fp80 @test(...) nounwind {
 	call void @llvm.va_start(i8* %v1)
 	%t1 = va_arg i8** %ap, x86_fp80		; <x86_fp80> [#uses=1]
 	%t2 = va_arg i8** %ap, x86_fp80		; <x86_fp80> [#uses=1]
-	%t = add x86_fp80 %t1, %t2		; <x86_fp80> [#uses=1]
+	%t = fadd x86_fp80 %t1, %t2		; <x86_fp80> [#uses=1]
 	ret x86_fp80 %t
 }
diff --git a/test/CodeGen/X86/2008-12-05-SpillerCrash.ll b/test/CodeGen/X86/2008-12-05-SpillerCrash.ll
index dbb7acf17f00..b6b5cbda4bd1 100644
--- a/test/CodeGen/X86/2008-12-05-SpillerCrash.ll
+++ b/test/CodeGen/X86/2008-12-05-SpillerCrash.ll
@@ -145,7 +145,7 @@ bb4426.i.i.i:		; preds = %bb7551.i.i.i
 	%20 = add <4 x i32> %19, zeroinitializer		; <<4 x i32>> [#uses=3]
 	%21 = load i32* null, align 4		; <i32> [#uses=0]
 	%22 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> zeroinitializer) nounwind readnone		; <<4 x float>> [#uses=1]
-	%23 = mul <4 x float> %22, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
+	%23 = fmul <4 x float> %22, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
 	%tmp2114.i119.i.i = extractelement <4 x i32> %20, i32 1		; <i32> [#uses=1]
 	%24 = shl i32 %tmp2114.i119.i.i, 2		; <i32> [#uses=1]
 	%25 = getelementptr i8* %11, i32 %24		; <i8*> [#uses=1]
@@ -160,7 +160,7 @@ bb4426.i.i.i:		; preds = %bb7551.i.i.i
 	%33 = bitcast <8 x i16> %32 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%34 = shufflevector <4 x i32> %33, <4 x i32> undef, <4 x i32> < i32 2, i32 1, i32 0, i32 3 >		; <<4 x i32>> [#uses=1]
 	%35 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %34) nounwind readnone		; <<4 x float>> [#uses=1]
-	%36 = mul <4 x float> %35, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
+	%36 = fmul <4 x float> %35, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
 	%tmp2113.i124.i.i = extractelement <4 x i32> %20, i32 2		; <i32> [#uses=1]
 	%37 = shl i32 %tmp2113.i124.i.i, 2		; <i32> [#uses=1]
 	%38 = getelementptr i8* %14, i32 %37		; <i8*> [#uses=1]
@@ -175,7 +175,7 @@ bb4426.i.i.i:		; preds = %bb7551.i.i.i
 	%46 = bitcast <8 x i16> %45 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%47 = shufflevector <4 x i32> %46, <4 x i32> undef, <4 x i32> < i32 2, i32 1, i32 0, i32 3 >		; <<4 x i32>> [#uses=1]
 	%48 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %47) nounwind readnone		; <<4 x float>> [#uses=1]
-	%49 = mul <4 x float> %48, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
+	%49 = fmul <4 x float> %48, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
 	%tmp2112.i129.i.i = extractelement <4 x i32> %20, i32 3		; <i32> [#uses=1]
 	%50 = shl i32 %tmp2112.i129.i.i, 2		; <i32> [#uses=1]
 	%51 = getelementptr i8* %17, i32 %50		; <i8*> [#uses=1]
@@ -190,15 +190,15 @@ bb4426.i.i.i:		; preds = %bb7551.i.i.i
 	%59 = bitcast <8 x i16> %58 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%60 = shufflevector <4 x i32> %59, <4 x i32> undef, <4 x i32> < i32 2, i32 1, i32 0, i32 3 >		; <<4 x i32>> [#uses=1]
 	%61 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %60) nounwind readnone		; <<4 x float>> [#uses=1]
-	%62 = mul <4 x float> %61, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
-	%63 = mul <4 x float> %23, zeroinitializer		; <<4 x float>> [#uses=1]
-	%64 = add <4 x float> zeroinitializer, %63		; <<4 x float>> [#uses=1]
-	%65 = mul <4 x float> %36, zeroinitializer		; <<4 x float>> [#uses=1]
-	%66 = add <4 x float> zeroinitializer, %65		; <<4 x float>> [#uses=1]
-	%67 = mul <4 x float> %49, zeroinitializer		; <<4 x float>> [#uses=1]
-	%68 = add <4 x float> zeroinitializer, %67		; <<4 x float>> [#uses=1]
-	%69 = mul <4 x float> %62, zeroinitializer		; <<4 x float>> [#uses=1]
-	%70 = add <4 x float> zeroinitializer, %69		; <<4 x float>> [#uses=1]
+	%62 = fmul <4 x float> %61, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
+	%63 = fmul <4 x float> %23, zeroinitializer		; <<4 x float>> [#uses=1]
+	%64 = fadd <4 x float> zeroinitializer, %63		; <<4 x float>> [#uses=1]
+	%65 = fmul <4 x float> %36, zeroinitializer		; <<4 x float>> [#uses=1]
+	%66 = fadd <4 x float> zeroinitializer, %65		; <<4 x float>> [#uses=1]
+	%67 = fmul <4 x float> %49, zeroinitializer		; <<4 x float>> [#uses=1]
+	%68 = fadd <4 x float> zeroinitializer, %67		; <<4 x float>> [#uses=1]
+	%69 = fmul <4 x float> %62, zeroinitializer		; <<4 x float>> [#uses=1]
+	%70 = fadd <4 x float> zeroinitializer, %69		; <<4 x float>> [#uses=1]
 	%tmp7452.i.i.i = bitcast <4 x float> %64 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp7454.i.i.i = and <4 x i32> %tmp7452.i.i.i, zeroinitializer		; <<4 x i32>> [#uses=1]
 	%tmp7459.i.i.i = or <4 x i32> %tmp7454.i.i.i, zeroinitializer		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/2009-01-16-UIntToFP.ll b/test/CodeGen/X86/2009-01-16-UIntToFP.ll
index 6de11c94794a..340608af35a8 100644
--- a/test/CodeGen/X86/2009-01-16-UIntToFP.ll
+++ b/test/CodeGen/X86/2009-01-16-UIntToFP.ll
@@ -22,10 +22,10 @@ bb2:		; preds = %bb1, %bb, %entry
 	%5 = lshr i64 %u_addr.0, 32		; <i64> [#uses=1]
 	%6 = trunc i64 %5 to i32		; <i32> [#uses=1]
 	%7 = uitofp i32 %6 to double		; <double> [#uses=1]
-	%8 = mul double %7, 0x41F0000000000000		; <double> [#uses=1]
+	%8 = fmul double %7, 0x41F0000000000000		; <double> [#uses=1]
 	%9 = trunc i64 %u_addr.0 to i32		; <i32> [#uses=1]
 	%10 = uitofp i32 %9 to double		; <double> [#uses=1]
-	%11 = add double %10, %8		; <double> [#uses=1]
+	%11 = fadd double %10, %8		; <double> [#uses=1]
 	%12 = fptrunc double %11 to float		; <float> [#uses=1]
 	ret float %12
 }
diff --git a/test/CodeGen/X86/2009-02-12-SpillerBug.ll b/test/CodeGen/X86/2009-02-12-SpillerBug.ll
index 747dc8ae0a64..1d10319e86d9 100644
--- a/test/CodeGen/X86/2009-02-12-SpillerBug.ll
+++ b/test/CodeGen/X86/2009-02-12-SpillerBug.ll
@@ -3,9 +3,9 @@
 
 define hidden void @__mulxc3({ x86_fp80, x86_fp80 }* noalias nocapture sret %agg.result, x86_fp80 %a, x86_fp80 %b, x86_fp80 %c, x86_fp80 %d) nounwind {
 entry:
-	%0 = mul x86_fp80 %b, %d		; <x86_fp80> [#uses=1]
-	%1 = sub x86_fp80 0xK00000000000000000000, %0		; <x86_fp80> [#uses=1]
-	%2 = add x86_fp80 0xK00000000000000000000, 0xK00000000000000000000		; <x86_fp80> [#uses=1]
+	%0 = fmul x86_fp80 %b, %d		; <x86_fp80> [#uses=1]
+	%1 = fsub x86_fp80 0xK00000000000000000000, %0		; <x86_fp80> [#uses=1]
+	%2 = fadd x86_fp80 0xK00000000000000000000, 0xK00000000000000000000		; <x86_fp80> [#uses=1]
 	%3 = fcmp uno x86_fp80 %1, 0xK00000000000000000000		; <i1> [#uses=1]
 	%4 = fcmp uno x86_fp80 %2, 0xK00000000000000000000		; <i1> [#uses=1]
 	%or.cond = and i1 %3, %4		; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2009-02-25-CommuteBug.ll b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
index b772bf8ec316..3dbfa80e00e6 100644
--- a/test/CodeGen/X86/2009-02-25-CommuteBug.ll
+++ b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
@@ -7,7 +7,7 @@ entry:
 	%tmp2.i = or <2 x i64> %tmp.i2, <i64 4607632778762754458, i64 4607632778762754458>		; <<2 x i64>> [#uses=1]
 	%tmp3.i = bitcast <2 x i64> %tmp2.i to <2 x double>		; <<2 x double>> [#uses=1]
 	%0 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %A, <2 x double> %tmp3.i) nounwind readnone		; <<2 x double>> [#uses=1]
-	%tmp.i = add <2 x double> %0, %C		; <<2 x double>> [#uses=1]
+	%tmp.i = fadd <2 x double> %0, %C		; <<2 x double>> [#uses=1]
 	ret <2 x double> %tmp.i
 }
 
diff --git a/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll b/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll
index 1eefaa91aafe..6f16ced1c674 100644
--- a/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll
+++ b/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll
@@ -6,7 +6,7 @@ define i32 @x(i32 %y) nounwind readnone {
 entry:
 	%tmp14 = zext i32 %y to i80		; <i80> [#uses=1]
 	%tmp15 = bitcast i80 %tmp14 to x86_fp80		; <x86_fp80> [#uses=1]
-	%add = add x86_fp80 %tmp15, 0xK3FFF8000000000000000		; <x86_fp80> [#uses=1]
+	%add = fadd x86_fp80 %tmp15, 0xK3FFF8000000000000000		; <x86_fp80> [#uses=1]
 	%tmp11 = bitcast x86_fp80 %add to i80		; <i80> [#uses=1]
 	%tmp10 = trunc i80 %tmp11 to i32		; <i32> [#uses=1]
 	ret i32 %tmp10
diff --git a/test/CodeGen/X86/2009-03-09-SpillerBug.ll b/test/CodeGen/X86/2009-03-09-SpillerBug.ll
index 14bdcc302775..2ccd7714233e 100644
--- a/test/CodeGen/X86/2009-03-09-SpillerBug.ll
+++ b/test/CodeGen/X86/2009-03-09-SpillerBug.ll
@@ -5,7 +5,7 @@ define void @__mulxc3(x86_fp80 %b) nounwind {
 entry:
 	%call = call x86_fp80 @y(x86_fp80* null, x86_fp80* null)		; <x86_fp80> [#uses=0]
 	%cmp = fcmp ord x86_fp80 %b, 0xK00000000000000000000		; <i1> [#uses=1]
-	%sub = sub x86_fp80 %b, %b		; <x86_fp80> [#uses=1]
+	%sub = fsub x86_fp80 %b, %b		; <x86_fp80> [#uses=1]
 	%cmp7 = fcmp uno x86_fp80 %sub, 0xK00000000000000000000		; <i1> [#uses=1]
 	%and12 = and i1 %cmp7, %cmp		; <i1> [#uses=1]
 	%and = zext i1 %and12 to i32		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2009-03-12-CPAlignBug.ll b/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
index 75af992d1d85..ec060e4ef4a5 100644
--- a/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
+++ b/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
@@ -19,18 +19,18 @@ bb1:		; preds = %newFuncRoot
 	%0 = tail call double @llvm.sqrt.f64(double %.reload8)		; <double> [#uses=1]
 	%1 = fptrunc x86_fp80 %.reload6 to double		; <double> [#uses=1]
 	%2 = tail call double @fabs(double %1) nounwind readnone		; <double> [#uses=1]
-	%3 = add double %0, %2		; <double> [#uses=1]
+	%3 = fadd double %0, %2		; <double> [#uses=1]
 	%4 = tail call double @llvm.pow.f64(double %3, double 0x3FD5555555555555)		; <double> [#uses=1]
 	%5 = fpext double %4 to x86_fp80		; <x86_fp80> [#uses=2]
 	%6 = fdiv x86_fp80 %.reload5, %5		; <x86_fp80> [#uses=1]
-	%7 = add x86_fp80 %5, %6		; <x86_fp80> [#uses=1]
+	%7 = fadd x86_fp80 %5, %6		; <x86_fp80> [#uses=1]
 	%8 = fptrunc x86_fp80 %7 to double		; <double> [#uses=1]
 	%9 = fcmp olt x86_fp80 %.reload6, 0xK00000000000000000000		; <i1> [#uses=1]
 	%iftmp.6.0 = select i1 %9, double 1.000000e+00, double -1.000000e+00		; <double> [#uses=1]
-	%10 = mul double %8, %iftmp.6.0		; <double> [#uses=1]
+	%10 = fmul double %8, %iftmp.6.0		; <double> [#uses=1]
 	%11 = fpext double %10 to x86_fp80		; <x86_fp80> [#uses=1]
 	%12 = fdiv x86_fp80 %.reload, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
-	%13 = add x86_fp80 %11, %12		; <x86_fp80> [#uses=1]
+	%13 = fadd x86_fp80 %11, %12		; <x86_fp80> [#uses=1]
 	%14 = fptrunc x86_fp80 %13 to double		; <double> [#uses=1]
 	store double %14, double* %x, align 1
 	br label %bb1.ret.exitStub
diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index a96314563c15..b30d41eb05ba 100644
--- a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -relocation-model=static -stats -info-output-file - > %t
+; RUN: llvm-as < %s | llc -mtriple=x86_64-linux -relocation-model=static -stats -info-output-file - > %t
 ; RUN: not grep spill %t
 ; RUN: not grep {%rsp} %t
 ; RUN: not grep {%rbp} %t
diff --git a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
new file mode 100644
index 000000000000..c628b8affdd9
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llc | grep "subq.*\\\$8, \\\%rsp"
+target triple = "x86_64-mingw64"
+
+define x86_fp80 @a(i64 %x) nounwind readnone {
+entry:
+	%conv = sitofp i64 %x to x86_fp80		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %conv
+}
+
diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
new file mode 100644
index 000000000000..33d797297be8
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llc -o %t1 -f
+; RUN: grep "subq.*\\\$40, \\\%rsp" %t1
+; RUN: grep "movaps	\\\%xmm8, \\\(\\\%rsp\\\)" %t1
+; RUN: grep "movaps	\\\%xmm7, 16\\\(\\\%rsp\\\)" %t1
+target triple = "x86_64-mingw64"
+
+define i32 @a() nounwind {
+entry:
+	tail call void asm sideeffect "", "~{xmm7},~{xmm8},~{dirflag},~{fpsr},~{flags}"() nounwind
+	ret i32 undef
+}
+
diff --git a/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll b/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
new file mode 100644
index 000000000000..fa90fa9426d6
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
@@ -0,0 +1,48 @@
+; RUN: llvm-as < %s | llc -march=x86
+
+	type { %struct.GAP }		; type %0
+	type { i16, i8, i8 }		; type %1
+	type { [2 x i32], [2 x i32] }		; type %2
+	type { %struct.rec* }		; type %3
+	%struct.FILE_POS = type { i8, i8, i16, i32 }
+	%struct.FIRST_UNION = type { %struct.FILE_POS }
+	%struct.FOURTH_UNION = type { %struct.STYLE }
+	%struct.GAP = type { i8, i8, i16 }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { %1 }
+	%struct.STYLE = type { %0, %0, i16, i16, i32 }
+	%struct.THIRD_UNION = type { %2 }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, %3, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.rec = type { %struct.head_type }
+
+define fastcc void @MinSize(%struct.rec* %x) nounwind {
+entry:
+	%tmp13 = load i8* undef, align 4		; <i8> [#uses=3]
+	%tmp14 = zext i8 %tmp13 to i32		; <i32> [#uses=2]
+	switch i32 %tmp14, label %bb1109 [
+		i32 42, label %bb246
+	]
+
+bb246:		; preds = %entry, %entry
+	switch i8 %tmp13, label %bb249 [
+		i8 42, label %bb269
+		i8 44, label %bb269
+	]
+
+bb249:		; preds = %bb246
+	%tmp3240 = icmp eq i8 %tmp13, 0		; <i1> [#uses=1]
+	br i1 %tmp3240, label %bb974, label %bb269
+
+bb269:
+	%tmp3424 = getelementptr %struct.rec* %x, i32 0, i32 0, i32 0, i32 0, i32 1		; <%struct.rec**> [#uses=0]
+	unreachable
+
+bb974:
+	unreachable
+
+bb1109:		; preds = %entry
+	call fastcc void @Image(i32 %tmp14) nounwind		; <i8*> [#uses=0]
+	unreachable
+}
+
+declare fastcc void @Image(i32) nounwind
diff --git a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
new file mode 100644
index 000000000000..94df530ec0e6
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | not grep movl
+
+define <8 x i8> @a(i8 zeroext %x) nounwind {
+  %r = insertelement <8 x i8> undef, i8 %x, i32 0
+  ret <8 x i8> %r
+}
+
diff --git a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
new file mode 100644
index 000000000000..220423aa986a
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
@@ -0,0 +1,37 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx,+sse2 > %t1
+; RUN: grep movzwl %t1 | count 2
+; RUN: grep movzbl %t1 | count 2
+; RUN: grep movd %t1 | count 4
+
+define <4 x i16> @a(i32* %x1) nounwind {
+  %x2 = load i32* %x1
+  %x3 = lshr i32 %x2, 1
+  %x = trunc i32 %x3 to i16
+  %r = insertelement <4 x i16> zeroinitializer, i16 %x, i32 0
+  ret <4 x i16> %r
+}
+
+define <8 x i16> @b(i32* %x1) nounwind {
+  %x2 = load i32* %x1
+  %x3 = lshr i32 %x2, 1
+  %x = trunc i32 %x3 to i16
+  %r = insertelement <8 x i16> zeroinitializer, i16 %x, i32 0
+  ret <8 x i16> %r
+}
+
+define <8 x i8> @c(i32* %x1) nounwind {
+  %x2 = load i32* %x1
+  %x3 = lshr i32 %x2, 1
+  %x = trunc i32 %x3 to i8
+  %r = insertelement <8 x i8> zeroinitializer, i8 %x, i32 0
+  ret <8 x i8> %r
+}
+
+define <16 x i8> @d(i32* %x1) nounwind {
+  %x2 = load i32* %x1
+  %x3 = lshr i32 %x2, 1
+  %x = trunc i32 %x3 to i8
+  %r = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0
+  ret <16 x i8> %r
+}
+
diff --git a/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll b/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
new file mode 100644
index 000000000000..2e3f195ff947
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llc
+
+define <2 x i64> @_mm_insert_epi16(<2 x i64> %a, i32 %b, i32 %imm) nounwind readnone {
+entry:
+	%conv = bitcast <2 x i64> %a to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%conv2 = trunc i32 %b to i16		; <i16> [#uses=1]
+	%and = and i32 %imm, 7		; <i32> [#uses=1]
+	%vecins = insertelement <8 x i16> %conv, i16 %conv2, i32 %and		; <<8 x i16>> [#uses=1]
+	%conv6 = bitcast <8 x i16> %vecins to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %conv6
+}
diff --git a/test/CodeGen/X86/2009-06-05-sitofpCrash.ll b/test/CodeGen/X86/2009-06-05-sitofpCrash.ll
new file mode 100644
index 000000000000..589a8800ede7
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-05-sitofpCrash.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse
+; PR2598
+
+define <2 x float> @a(<2 x i32> %i) nounwind {
+  %r = sitofp <2 x i32> %i to <2 x float> 
+  ret <2 x float> %r
+}
+
+define <2 x i32> @b(<2 x float> %i) nounwind {
+  %r = fptosi <2 x float> %i to <2 x i32> 
+  ret <2 x i32> %r
+}
+
diff --git a/test/CodeGen/X86/2009-06-06-ConcatVectors.ll b/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
new file mode 100644
index 000000000000..a46fd1a2e76f
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llc
+
+define <2 x i64> @_mm_movpi64_pi64(<1 x i64> %a, <1 x i64> %b) nounwind readnone {
+entry:
+  %0 = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1>
+	ret <2 x i64> %0
+}
+
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index f1fec3f8b94d..513599c58bcd 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -141,26 +141,6 @@
 ; RUN: not grep @PLTOFF %t
 ; RUN: grep {call	\\\*} %t | count 10
 ; RUN: not grep {%rip} %t
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small > %t
-; RUN: not grep leal %t
-; RUN: grep movl %t | count 91
-; RUN: not grep addl %t
-; RUN: not grep subl %t
-; RUN: grep leaq %t | count 70
-; RUN: grep movq %t | count 56
-; RUN: grep addq %t | count 20
-; RUN: grep subq %t | count 14
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: not grep @GOT %t
-; RUN: not grep @GOTOFF %t
-; RUN: not grep @GOTPCREL %t
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: grep {%rip} %t | count 139
 ; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small > %t
 ; RUN: not grep leal %t
 ; RUN: grep movl %t | count 95
diff --git a/test/CodeGen/X86/break-anti-dependencies.ll b/test/CodeGen/X86/break-anti-dependencies.ll
index b432c39c0113..b9ce10f44198 100644
--- a/test/CodeGen/X86/break-anti-dependencies.ll
+++ b/test/CodeGen/X86/break-anti-dependencies.ll
@@ -8,18 +8,18 @@
 define void @goo(double* %r, double* %p, double* %q) nounwind {
 entry:
 	%0 = load double* %p, align 8
-	%1 = add double %0, 1.100000e+00
-	%2 = mul double %1, 1.200000e+00
-	%3 = add double %2, 1.300000e+00
-	%4 = mul double %3, 1.400000e+00
-	%5 = add double %4, 1.500000e+00
+	%1 = fadd double %0, 1.100000e+00
+	%2 = fmul double %1, 1.200000e+00
+	%3 = fadd double %2, 1.300000e+00
+	%4 = fmul double %3, 1.400000e+00
+	%5 = fadd double %4, 1.500000e+00
 	%6 = fptosi double %5 to i32
 	%7 = load double* %r, align 8
-	%8 = add double %7, 7.100000e+00
-	%9 = mul double %8, 7.200000e+00
-	%10 = add double %9, 7.300000e+00
-	%11 = mul double %10, 7.400000e+00
-	%12 = add double %11, 7.500000e+00
+	%8 = fadd double %7, 7.100000e+00
+	%9 = fmul double %8, 7.200000e+00
+	%10 = fadd double %9, 7.300000e+00
+	%11 = fmul double %10, 7.400000e+00
+	%12 = fadd double %11, 7.500000e+00
 	%13 = fptosi double %12 to i32
 	%14 = icmp slt i32 %6, %13
 	br i1 %14, label %bb, label %return
diff --git a/test/CodeGen/X86/coalescer-commute1.ll b/test/CodeGen/X86/coalescer-commute1.ll
index 0fae2a659466..99394240c7c8 100644
--- a/test/CodeGen/X86/coalescer-commute1.ll
+++ b/test/CodeGen/X86/coalescer-commute1.ll
@@ -15,7 +15,7 @@ bb:		; preds = %bb, %entry
 	%tmp2 = getelementptr i32* %source, i32 %neuron.0		; <i32*> [#uses=1]
 	%tmp3 = load i32* %tmp2, align 4		; <i32> [#uses=1]
 	%tmp34 = sitofp i32 %tmp3 to float		; <float> [#uses=1]
-	%tmp6 = add float %tmp34, %thesum.0		; <float> [#uses=2]
+	%tmp6 = fadd float %tmp34, %thesum.0		; <float> [#uses=2]
 	%indvar.next = add i32 %neuron.0, 1		; <i32> [#uses=2]
 	%exitcond = icmp eq i32 %indvar.next, %tmp10		; <i1> [#uses=1]
 	br i1 %exitcond, label %bb13, label %bb
diff --git a/test/CodeGen/X86/coalescer-commute2.ll b/test/CodeGen/X86/coalescer-commute2.ll
index ce4abf1d31ab..c67e0f582496 100644
--- a/test/CodeGen/X86/coalescer-commute2.ll
+++ b/test/CodeGen/X86/coalescer-commute2.ll
@@ -28,7 +28,7 @@ define <4 x float> @test3(<4 x float> %V) {
 entry:
         %tmp8 = shufflevector <4 x float> %V, <4 x float> undef,
                                         <4 x i32> < i32 3, i32 2, i32 1, i32 0 >
-        %add = add <4 x float> %tmp8, %V
+        %add = fadd <4 x float> %tmp8, %V
         ret <4 x float> %add
 }
 
diff --git a/test/CodeGen/X86/coalescer-commute4.ll b/test/CodeGen/X86/coalescer-commute4.ll
index 7299aca3e0b3..9628f93e7916 100644
--- a/test/CodeGen/X86/coalescer-commute4.ll
+++ b/test/CodeGen/X86/coalescer-commute4.ll
@@ -18,8 +18,8 @@ bb:		; preds = %bb, %bb.preheader
 	%tmp45 = sitofp i32 %tmp4 to float		; <float> [#uses=1]
 	%tmp8 = getelementptr float* %y, i32 %i.0.reg2mem.0		; <float*> [#uses=1]
 	%tmp9 = load float* %tmp8, align 4		; <float> [#uses=1]
-	%tmp11 = mul float %tmp9, %tmp45		; <float> [#uses=1]
-	%tmp14 = add float %tmp11, %res.0.reg2mem.0		; <float> [#uses=2]
+	%tmp11 = fmul float %tmp9, %tmp45		; <float> [#uses=1]
+	%tmp14 = fadd float %tmp11, %res.0.reg2mem.0		; <float> [#uses=2]
 	%indvar.next = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
 	%exitcond = icmp eq i32 %indvar.next, %umax		; <i1> [#uses=1]
 	br i1 %exitcond, label %bb23, label %bb
diff --git a/test/CodeGen/X86/complex-fca.ll b/test/CodeGen/X86/complex-fca.ll
index 29eb6ee11e95..05adb50b294f 100644
--- a/test/CodeGen/X86/complex-fca.ll
+++ b/test/CodeGen/X86/complex-fca.ll
@@ -4,7 +4,7 @@ define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80,
 entry:
 	%z8 = extractvalue { x86_fp80, x86_fp80 } %z, 0
 	%z9 = extractvalue { x86_fp80, x86_fp80 } %z, 1
-	%0 = sub x86_fp80 0xK80000000000000000000, %z9
+	%0 = fsub x86_fp80 0xK80000000000000000000, %z9
 	%insert = insertvalue { x86_fp80, x86_fp80 } undef, x86_fp80 %0, 0
 	%insert7 = insertvalue { x86_fp80, x86_fp80 } %insert, x86_fp80 %z8, 1
 	call void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %insert7) nounwind
diff --git a/test/CodeGen/X86/constant-pool-remat-0.ll b/test/CodeGen/X86/constant-pool-remat-0.ll
index 40caaa6b936a..80be8545d59c 100644
--- a/test/CodeGen/X86/constant-pool-remat-0.ll
+++ b/test/CodeGen/X86/constant-pool-remat-0.ll
@@ -6,8 +6,8 @@
 declare float @qux(float %y)
 
 define float @array(float %a) nounwind {
-  %n = mul float %a, 9.0
+  %n = fmul float %a, 9.0
   %m = call float @qux(float %n)
-  %o = mul float %m, 9.0
+  %o = fmul float %m, 9.0
   ret float %o
 }
diff --git a/test/CodeGen/X86/dagcombine-buildvector.ll b/test/CodeGen/X86/dagcombine-buildvector.ll
index c89a296d0db9..b96fdfc03c68 100644
--- a/test/CodeGen/X86/dagcombine-buildvector.ll
+++ b/test/CodeGen/X86/dagcombine-buildvector.ll
@@ -1,13 +1,25 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llvm-as < %s | llc -march=x86 -mcpu=penryn -disable-mmx -o %t -f
 ; RUN: grep unpcklpd %t | count 1
 ; RUN: grep movapd %t | count 1
+; RUN: grep movaps %t | count 1
 
 ; Shows a dag combine bug that will generate an illegal build vector
 ; with v2i64 build_vector i32, i32.
 
-define void @test(<2 x double>* %dst, <4 x double> %src) {
+define void @test(<2 x double>* %dst, <4 x double> %src) nounwind {
 entry:
         %tmp7.i = shufflevector <4 x double> %src, <4 x double> undef, <2 x i32> < i32 0, i32 2 >
         store <2 x double> %tmp7.i, <2 x double>* %dst
         ret void
 }
+
+define void @test2(<4 x i16>* %src, <4 x i32>* %dest) nounwind {
+entry:
+        %tmp1 = load <4 x i16>* %src
+        %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+        %0 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3)
+        store <4 x i32> %0, <4 x i32>* %dest
+        ret void
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
diff --git a/test/CodeGen/X86/extract-combine.ll b/test/CodeGen/X86/extract-combine.ll
index 9172dced055f..842ec24e0ec8 100644
--- a/test/CodeGen/X86/extract-combine.ll
+++ b/test/CodeGen/X86/extract-combine.ll
@@ -7,9 +7,9 @@ entry:
 	%tmp518 = shufflevector <16 x float> %tmp74.i25762, <16 x float> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>		; <<4 x float>> [#uses=1]
 	%movss.i25611 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp518, <4 x i32> <i32 4, i32 1, i32 2, i32 3>		; <<4 x float>> [#uses=1]
 	%conv3.i25615 = shufflevector <4 x float> %movss.i25611, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>		; <<4 x float>> [#uses=1]
-	%sub.i25620 = sub <4 x float> %conv3.i25615, zeroinitializer		; <<4 x float>> [#uses=1]
-	%mul.i25621 = mul <4 x float> zeroinitializer, %sub.i25620		; <<4 x float>> [#uses=1]
-	%add.i25622 = add <4 x float> zeroinitializer, %mul.i25621		; <<4 x float>> [#uses=1]
+	%sub.i25620 = fsub <4 x float> %conv3.i25615, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul.i25621 = fmul <4 x float> zeroinitializer, %sub.i25620		; <<4 x float>> [#uses=1]
+	%add.i25622 = fadd <4 x float> zeroinitializer, %mul.i25621		; <<4 x float>> [#uses=1]
 	store <4 x float> %add.i25622, <4 x float>* null
 	unreachable
 }
diff --git a/test/CodeGen/X86/fabs.ll b/test/CodeGen/X86/fabs.ll
index 0646a7963ad3..7ac8e048edbc 100644
--- a/test/CodeGen/X86/fabs.ll
+++ b/test/CodeGen/X86/fabs.ll
@@ -16,7 +16,7 @@ define float @test1(float %X) {
 
 define double @test2(double %X) {
         %Y = fcmp oge double %X, -0.0
-        %Z = sub double -0.0, %X
+        %Z = fsub double -0.0, %X
         %Q = select i1 %Y, double %X, double %Z
         ret double %Q
 }
diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll
index 2ee2c835b1a4..a9a016b7d0f3 100644
--- a/test/CodeGen/X86/fast-isel.ll
+++ b/test/CodeGen/X86/fast-isel.ll
@@ -32,10 +32,10 @@ entry:
   br label %fast
 
 fast:
-  %t0 = add double %r, %s
-  %t1 = mul double %t0, %s
-  %t2 = sub double %t1, %s
-  %t3 = add double %t2, 707.0
+  %t0 = fadd double %r, %s
+  %t1 = fmul double %t0, %s
+  %t2 = fsub double %t1, %s
+  %t3 = fadd double %t2, 707.0
   br label %exit
 
 exit:
diff --git a/test/CodeGen/X86/fmul-zero.ll b/test/CodeGen/X86/fmul-zero.ll
new file mode 100644
index 000000000000..8f705a4d242f
--- /dev/null
+++ b/test/CodeGen/X86/fmul-zero.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llc -march=x86-64 -enable-unsafe-fp-math | not grep mulps
+; RUN: llvm-as < %s | llc -march=x86-64 | grep mulps
+
+define void @test14(<4 x float>*) nounwind {
+        load <4 x float>* %0, align 1
+        mul <4 x float> %2, zeroinitializer
+        store <4 x float> %3, <4 x float>* %0, align 1
+        ret void
+}
diff --git a/test/CodeGen/X86/fold-pcmpeqd-0.ll b/test/CodeGen/X86/fold-pcmpeqd-0.ll
index 066d38eb8c43..f558aca42005 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-0.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-0.ll
@@ -26,23 +26,23 @@ forcond:		; preds = %entry
 
 forbody:		; preds = %forcond
 	%bitcast204.i313 = bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>> [#uses=1]
-	%mul233 = mul <4 x float> %bitcast204.i313, zeroinitializer		; <<4 x float>> [#uses=1]
-	%mul257 = mul <4 x float> %mul233, zeroinitializer		; <<4 x float>> [#uses=1]
-	%mul275 = mul <4 x float> %mul257, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul233 = fmul <4 x float> %bitcast204.i313, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul257 = fmul <4 x float> %mul233, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul275 = fmul <4 x float> %mul257, zeroinitializer		; <<4 x float>> [#uses=1]
 	%tmp51 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %mul275, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
 	%bitcast198.i182 = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=0]
 	%bitcast204.i185 = bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>> [#uses=1]
 	%tmp69 = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> zeroinitializer) nounwind		; <<4 x i32>> [#uses=1]
 	%tmp70 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp69) nounwind		; <<4 x float>> [#uses=1]
-	%sub140.i78 = sub <4 x float> zeroinitializer, %tmp70		; <<4 x float>> [#uses=2]
-	%mul166.i86 = mul <4 x float> zeroinitializer, %sub140.i78		; <<4 x float>> [#uses=1]
-	%add167.i87 = add <4 x float> %mul166.i86, < float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000 >		; <<4 x float>> [#uses=1]
-	%mul171.i88 = mul <4 x float> %add167.i87, %sub140.i78		; <<4 x float>> [#uses=1]
-	%add172.i89 = add <4 x float> %mul171.i88, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
+	%sub140.i78 = fsub <4 x float> zeroinitializer, %tmp70		; <<4 x float>> [#uses=2]
+	%mul166.i86 = fmul <4 x float> zeroinitializer, %sub140.i78		; <<4 x float>> [#uses=1]
+	%add167.i87 = fadd <4 x float> %mul166.i86, < float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000 >		; <<4 x float>> [#uses=1]
+	%mul171.i88 = fmul <4 x float> %add167.i87, %sub140.i78		; <<4 x float>> [#uses=1]
+	%add172.i89 = fadd <4 x float> %mul171.i88, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
 	%bitcast176.i90 = bitcast <4 x float> %add172.i89 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%andnps178.i92 = and <4 x i32> %bitcast176.i90, zeroinitializer		; <<4 x i32>> [#uses=1]
 	%bitcast179.i93 = bitcast <4 x i32> %andnps178.i92 to <4 x float>		; <<4 x float>> [#uses=1]
-	%mul186.i96 = mul <4 x float> %bitcast179.i93, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul186.i96 = fmul <4 x float> %bitcast179.i93, zeroinitializer		; <<4 x float>> [#uses=1]
 	%bitcast190.i98 = bitcast <4 x float> %mul186.i96 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%andnps192.i100 = and <4 x i32> %bitcast190.i98, zeroinitializer		; <<4 x i32>> [#uses=1]
 	%xorps.i102 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
@@ -50,15 +50,15 @@ forbody:		; preds = %forcond
 	%bitcast204.i104 = bitcast <4 x i32> %orps203.i103 to <4 x float>		; <<4 x float>> [#uses=1]
 	%cmple.i = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> %tmp51, i8 2) nounwind		; <<4 x float>> [#uses=1]
 	%tmp80 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
-	%sub140.i = sub <4 x float> zeroinitializer, %tmp80		; <<4 x float>> [#uses=1]
+	%sub140.i = fsub <4 x float> zeroinitializer, %tmp80		; <<4 x float>> [#uses=1]
 	%bitcast148.i = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%andnps150.i = and <4 x i32> %bitcast148.i, < i32 -2139095041, i32 -2139095041, i32 -2139095041, i32 -2139095041 >		; <<4 x i32>> [#uses=0]
-	%mul171.i = mul <4 x float> zeroinitializer, %sub140.i		; <<4 x float>> [#uses=1]
-	%add172.i = add <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
+	%mul171.i = fmul <4 x float> zeroinitializer, %sub140.i		; <<4 x float>> [#uses=1]
+	%add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
 	%bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%andnps178.i = and <4 x i32> %bitcast176.i, zeroinitializer		; <<4 x i32>> [#uses=1]
 	%bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float>		; <<4 x float>> [#uses=1]
-	%mul186.i = mul <4 x float> %bitcast179.i, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer		; <<4 x float>> [#uses=1]
 	%bitcast189.i = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=0]
 	%bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%andnps192.i = and <4 x i32> %bitcast190.i, zeroinitializer		; <<4 x i32>> [#uses=1]
@@ -66,9 +66,9 @@ forbody:		; preds = %forcond
 	%xorps.i = xor <4 x i32> %bitcast198.i, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
 	%orps203.i = or <4 x i32> %andnps192.i, %xorps.i		; <<4 x i32>> [#uses=1]
 	%bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float>		; <<4 x float>> [#uses=1]
-	%mul307 = mul <4 x float> %bitcast204.i185, zeroinitializer		; <<4 x float>> [#uses=1]
-	%mul310 = mul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
-	%mul313 = mul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul307 = fmul <4 x float> %bitcast204.i185, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
+	%mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
 	%tmp82 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul307, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
 	%bitcast11.i15 = bitcast <4 x float> %tmp82 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%andnps.i17 = and <4 x i32> %bitcast11.i15, zeroinitializer		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll
index de6ba6c49fff..2b75781218bc 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -28,22 +28,22 @@ forbody:		; preds = %forcond
 	%tmp78 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> < float 1.280000e+02, float 1.280000e+02, float 1.280000e+02, float 1.280000e+02 >, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=2]
 	%tmp79 = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp78) nounwind		; <<4 x i32>> [#uses=1]
 	%tmp80 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp79) nounwind		; <<4 x float>> [#uses=1]
-	%sub140.i = sub <4 x float> %tmp78, %tmp80		; <<4 x float>> [#uses=2]
-	%mul166.i = mul <4 x float> zeroinitializer, %sub140.i		; <<4 x float>> [#uses=1]
-	%add167.i = add <4 x float> %mul166.i, < float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000 >		; <<4 x float>> [#uses=1]
-	%mul171.i = mul <4 x float> %add167.i, %sub140.i		; <<4 x float>> [#uses=1]
-	%add172.i = add <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
+	%sub140.i = fsub <4 x float> %tmp78, %tmp80		; <<4 x float>> [#uses=2]
+	%mul166.i = fmul <4 x float> zeroinitializer, %sub140.i		; <<4 x float>> [#uses=1]
+	%add167.i = fadd <4 x float> %mul166.i, < float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000 >		; <<4 x float>> [#uses=1]
+	%mul171.i = fmul <4 x float> %add167.i, %sub140.i		; <<4 x float>> [#uses=1]
+	%add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
 	%bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%andnps178.i = and <4 x i32> %bitcast176.i, zeroinitializer		; <<4 x i32>> [#uses=1]
 	%bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float>		; <<4 x float>> [#uses=1]
-	%mul186.i = mul <4 x float> %bitcast179.i, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer		; <<4 x float>> [#uses=1]
 	%bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%andnps192.i = and <4 x i32> %bitcast190.i, zeroinitializer		; <<4 x i32>> [#uses=1]
 	%xorps.i = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
 	%orps203.i = or <4 x i32> %andnps192.i, %xorps.i		; <<4 x i32>> [#uses=1]
 	%bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float>		; <<4 x float>> [#uses=1]
-	%mul310 = mul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
-	%mul313 = mul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
+	%mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
 	%cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind		; <<4 x float>> [#uses=1]
 	%bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32>		; <<4 x i32>> [#uses=2]
 	%andps.i14 = and <4 x i32> zeroinitializer, %bitcast6.i13		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/fp-in-intregs.ll b/test/CodeGen/X86/fp-in-intregs.ll
index 1e3ea8918dc4..15606c34886b 100644
--- a/test/CodeGen/X86/fp-in-intregs.ll
+++ b/test/CodeGen/X86/fp-in-intregs.ll
@@ -5,7 +5,7 @@
 
 define i32 @test1(float %x) nounwind  {
 entry:
-	%tmp2 = sub float -0.000000e+00, %x		; <float> [#uses=1]
+	%tmp2 = fsub float -0.000000e+00, %x		; <float> [#uses=1]
 	%tmp210 = bitcast float %tmp2 to i32		; <i32> [#uses=1]
 	ret i32 %tmp210
 }
diff --git a/test/CodeGen/X86/fp-stack-compare.ll b/test/CodeGen/X86/fp-stack-compare.ll
index 383549ac43d0..4e61d0fbe7dc 100644
--- a/test/CodeGen/X86/fp-stack-compare.ll
+++ b/test/CodeGen/X86/fp-stack-compare.ll
@@ -5,7 +5,7 @@
 define float @foo(float* %col.2.0) {
         %tmp = load float* %col.2.0             ; <float> [#uses=3]
         %tmp16 = fcmp olt float %tmp, 0.000000e+00              ; <i1> [#uses=1]
-        %tmp20 = sub float -0.000000e+00, %tmp          ; <float> [#uses=1]
+        %tmp20 = fsub float -0.000000e+00, %tmp          ; <float> [#uses=1]
         %iftmp.2.0 = select i1 %tmp16, float %tmp20, float %tmp         ; <float> [#uses=1]
         ret float %iftmp.2.0
 }
diff --git a/test/CodeGen/X86/fp_constant_op.ll b/test/CodeGen/X86/fp_constant_op.ll
index ed02c6a8f72a..f2017b961fb5 100644
--- a/test/CodeGen/X86/fp_constant_op.ll
+++ b/test/CodeGen/X86/fp_constant_op.ll
@@ -5,22 +5,22 @@
 
 
 define double @foo_add(double %P) {
-	%tmp.1 = add double %P, 1.230000e+02		; <double> [#uses=1]
+	%tmp.1 = fadd double %P, 1.230000e+02		; <double> [#uses=1]
 	ret double %tmp.1
 }
 
 define double @foo_mul(double %P) {
-	%tmp.1 = mul double %P, 1.230000e+02		; <double> [#uses=1]
+	%tmp.1 = fmul double %P, 1.230000e+02		; <double> [#uses=1]
 	ret double %tmp.1
 }
 
 define double @foo_sub(double %P) {
-	%tmp.1 = sub double %P, 1.230000e+02		; <double> [#uses=1]
+	%tmp.1 = fsub double %P, 1.230000e+02		; <double> [#uses=1]
 	ret double %tmp.1
 }
 
 define double @foo_subr(double %P) {
-	%tmp.1 = sub double 1.230000e+02, %P		; <double> [#uses=1]
+	%tmp.1 = fsub double 1.230000e+02, %P		; <double> [#uses=1]
 	ret double %tmp.1
 }
 
diff --git a/test/CodeGen/X86/fp_load_fold.ll b/test/CodeGen/X86/fp_load_fold.ll
index 7c33cb32bc6d..655ad3df3238 100644
--- a/test/CodeGen/X86/fp_load_fold.ll
+++ b/test/CodeGen/X86/fp_load_fold.ll
@@ -5,25 +5,25 @@
 
 define double @test_add(double %X, double* %P) {
 	%Y = load double* %P		; <double> [#uses=1]
-	%R = add double %X, %Y		; <double> [#uses=1]
+	%R = fadd double %X, %Y		; <double> [#uses=1]
 	ret double %R
 }
 
 define double @test_mul(double %X, double* %P) {
 	%Y = load double* %P		; <double> [#uses=1]
-	%R = mul double %X, %Y		; <double> [#uses=1]
+	%R = fmul double %X, %Y		; <double> [#uses=1]
 	ret double %R
 }
 
 define double @test_sub(double %X, double* %P) {
 	%Y = load double* %P		; <double> [#uses=1]
-	%R = sub double %X, %Y		; <double> [#uses=1]
+	%R = fsub double %X, %Y		; <double> [#uses=1]
 	ret double %R
 }
 
 define double @test_subr(double %X, double* %P) {
 	%Y = load double* %P		; <double> [#uses=1]
-	%R = sub double %Y, %X		; <double> [#uses=1]
+	%R = fsub double %Y, %X		; <double> [#uses=1]
 	ret double %R
 }
 
diff --git a/test/CodeGen/X86/fsxor-alignment.ll b/test/CodeGen/X86/fsxor-alignment.ll
index 71007dcc0533..4d25fca1eb11 100644
--- a/test/CodeGen/X86/fsxor-alignment.ll
+++ b/test/CodeGen/X86/fsxor-alignment.ll
@@ -6,8 +6,8 @@
 ; and aren't vector-aligned.
 
 define void @foo(float* %p, float* %q, float %s, float %y) {
-  %ss = sub float -0.0, %s
-  %yy = sub float -0.0, %y
+  %ss = fsub float -0.0, %s
+  %yy = fsub float -0.0, %y
   store float %ss, float* %p
   store float %yy, float* %q
   ret void
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index ee9eaf95c807..4a85779ebf0a 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -13,7 +13,7 @@ bb:		; preds = %bb, %entry
 	%2 = load float* %1, align 4		; <float> [#uses=1]
 	%3 = getelementptr float* %B, i32 %i.03		; <float*> [#uses=1]
 	%4 = load float* %3, align 4		; <float> [#uses=1]
-	%5 = add float %2, %4		; <float> [#uses=1]
+	%5 = fadd float %2, %4		; <float> [#uses=1]
 	%6 = getelementptr float* %C, i32 %i.03		; <float*> [#uses=1]
 	store float %5, float* %6, align 4
 	%7 = add i32 %i.03, 10		; <i32> [#uses=3]
@@ -21,7 +21,7 @@ bb:		; preds = %bb, %entry
 	%9 = load float* %8, align 4		; <float> [#uses=1]
 	%10 = getelementptr float* %B, i32 %7		; <float*> [#uses=1]
 	%11 = load float* %10, align 4		; <float> [#uses=1]
-	%12 = add float %9, %11		; <float> [#uses=1]
+	%12 = fadd float %9, %11		; <float> [#uses=1]
 	%13 = getelementptr float* %C, i32 %7		; <float*> [#uses=1]
 	store float %12, float* %13, align 4
 	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
diff --git a/test/CodeGen/X86/ga-offset.ll b/test/CodeGen/X86/ga-offset.ll
index cc93b4c2eeff..aaa2f84b88c9 100644
--- a/test/CodeGen/X86/ga-offset.ll
+++ b/test/CodeGen/X86/ga-offset.ll
@@ -2,7 +2,7 @@
 ; RUN: not grep lea %t
 ; RUN: not grep add %t
 ; RUN: grep mov %t | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 -relocation-model=static > %t
+; RUN: llvm-as < %s | llc -mtriple=x86_64-linux -relocation-model=static > %t
 ; RUN: not grep lea %t
 ; RUN: not grep add %t
 ; RUN: grep mov %t | count 1
diff --git a/test/CodeGen/X86/illegal-vector-args-return.ll b/test/CodeGen/X86/illegal-vector-args-return.ll
index 8fb6db356ced..5ed6ddb55129 100644
--- a/test/CodeGen/X86/illegal-vector-args-return.ll
+++ b/test/CodeGen/X86/illegal-vector-args-return.ll
@@ -4,11 +4,11 @@
 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {addps	%xmm2, %xmm0}
 
 define <4 x double> @foo(<4 x double> %x, <4 x double> %z) {
-  %y = mul <4 x double> %x, %z
+  %y = fmul <4 x double> %x, %z
   ret <4 x double> %y
 }
 
 define <8 x float> @bar(<8 x float> %x, <8 x float> %z) {
-  %y = add <8 x float> %x, %z
+  %y = fadd <8 x float> %x, %z
   ret <8 x float> %y
 }
diff --git a/test/CodeGen/X86/inline-asm-fpstack.ll b/test/CodeGen/X86/inline-asm-fpstack.ll
index 91f2f2fb9330..31d94d89c376 100644
--- a/test/CodeGen/X86/inline-asm-fpstack.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack.ll
@@ -21,7 +21,7 @@ define void @test4(double %X) {
 }
 
 define void @test5(double %X) {
-        %Y = add double %X, 123.0
+        %Y = fadd double %X, 123.0
         call void asm sideeffect "frob ", "{st(0)},~{dirflag},~{fpsr},~{flags}"( double %Y)
         ret void
 }
diff --git a/test/CodeGen/X86/inline-asm-mrv.ll b/test/CodeGen/X86/inline-asm-mrv.ll
index f679c7f58ba1..ca39c120585a 100644
--- a/test/CodeGen/X86/inline-asm-mrv.ll
+++ b/test/CodeGen/X86/inline-asm-mrv.ll
@@ -21,7 +21,7 @@ define <4 x float> @test2() nounwind {
 	%mrv = call {<4 x float>, <4 x float>} asm "set $0, $1", "=x,=x"()
 	%a = getresult {<4 x float>, <4 x float>} %mrv, 0
 	%b = getresult {<4 x float>, <4 x float>} %mrv, 1
-	%c = add <4 x float> %a, %b
+	%c = fadd <4 x float> %a, %b
 	ret <4 x float> %c
 }
 
diff --git a/test/CodeGen/X86/inline-asm-x-scalar.ll b/test/CodeGen/X86/inline-asm-x-scalar.ll
index d1bac0c3b27d..aafbbd1fd025 100644
--- a/test/CodeGen/X86/inline-asm-x-scalar.ll
+++ b/test/CodeGen/X86/inline-asm-x-scalar.ll
@@ -17,7 +17,7 @@ define void @test3() {
 
 define void @test4() {
         %tmp1 = tail call float asm "", "=x,0,~{dirflag},~{fpsr},~{flags}"( float 0x47EFFFFFE0000000 ); <float> [#uses=1]
-        %tmp4 = sub float %tmp1, 0x3810000000000000             ; <float> [#uses=1]
+        %tmp4 = fsub float %tmp1, 0x3810000000000000             ; <float> [#uses=1]
         tail call void asm sideeffect "", "x,~{dirflag},~{fpsr},~{flags}"( float %tmp4 )
         ret void
 }
diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll
index 275febaf7dca..2208b2d2e0e3 100644
--- a/test/CodeGen/X86/iv-users-in-other-loops.ll
+++ b/test/CodeGen/X86/iv-users-in-other-loops.ll
@@ -17,52 +17,52 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @foo(float* %A, i32 %IA, float* %B, i32 %IB, float* nocapture %C, i32 %N) nounwind {
 entry:
-	%0 = xor i32 %IA, 1		; <i32> [#uses=1]
-	%1 = xor i32 %IB, 1		; <i32> [#uses=1]
-	%2 = or i32 %1, %0		; <i32> [#uses=1]
-	%3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
-	br i1 %3, label %bb2, label %bb13
+      %0 = xor i32 %IA, 1		; <i32> [#uses=1]
+      %1 = xor i32 %IB, 1		; <i32> [#uses=1]
+      %2 = or i32 %1, %0		; <i32> [#uses=1]
+      %3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
+      br i1 %3, label %bb2, label %bb13
 
 bb:		; preds = %bb3
-	%4 = load float* %A_addr.0, align 4		; <float> [#uses=1]
-	%5 = load float* %B_addr.0, align 4		; <float> [#uses=1]
-	%6 = mul float %4, %5		; <float> [#uses=1]
-	%7 = add float %6, %Sum0.0		; <float> [#uses=1]
-	%indvar.next154 = add i64 %B_addr.0.rec, 1		; <i64> [#uses=1]
-	br label %bb2
+      %4 = load float* %A_addr.0, align 4		; <float> [#uses=1]
+      %5 = load float* %B_addr.0, align 4		; <float> [#uses=1]
+      %6 = fmul float %4, %5		; <float> [#uses=1]
+      %7 = fadd float %6, %Sum0.0		; <float> [#uses=1]
+      %indvar.next154 = add i64 %B_addr.0.rec, 1		; <i64> [#uses=1]
+      br label %bb2
 
 bb2:		; preds = %entry, %bb
-	%B_addr.0.rec = phi i64 [ %indvar.next154, %bb ], [ 0, %entry ]		; <i64> [#uses=14]
-	%Sum0.0 = phi float [ %7, %bb ], [ 0.000000e+00, %entry ]		; <float> [#uses=5]
-	%indvar146 = trunc i64 %B_addr.0.rec to i32		; <i32> [#uses=1]
-	%N_addr.0 = sub i32 %N, %indvar146		; <i32> [#uses=6]
-	%A_addr.0 = getelementptr float* %A, i64 %B_addr.0.rec		; <float*> [#uses=4]
-	%B_addr.0 = getelementptr float* %B, i64 %B_addr.0.rec		; <float*> [#uses=4]
-	%8 = icmp sgt i32 %N_addr.0, 0		; <i1> [#uses=1]
-	br i1 %8, label %bb3, label %bb4
+      %B_addr.0.rec = phi i64 [ %indvar.next154, %bb ], [ 0, %entry ]		; <i64> [#uses=14]
+      %Sum0.0 = phi float [ %7, %bb ], [ 0.000000e+00, %entry ]		; <float> [#uses=5]
+      %indvar146 = trunc i64 %B_addr.0.rec to i32		; <i32> [#uses=1]
+      %N_addr.0 = sub i32 %N, %indvar146		; <i32> [#uses=6]
+      %A_addr.0 = getelementptr float* %A, i64 %B_addr.0.rec		; <float*> [#uses=4]
+      %B_addr.0 = getelementptr float* %B, i64 %B_addr.0.rec		; <float*> [#uses=4]
+      %8 = icmp sgt i32 %N_addr.0, 0		; <i1> [#uses=1]
+      br i1 %8, label %bb3, label %bb4
 
 bb3:		; preds = %bb2
-	%9 = ptrtoint float* %A_addr.0 to i64		; <i64> [#uses=1]
-	%10 = and i64 %9, 15		; <i64> [#uses=1]
-	%11 = icmp eq i64 %10, 0		; <i1> [#uses=1]
-	br i1 %11, label %bb4, label %bb
+      %9 = ptrtoint float* %A_addr.0 to i64		; <i64> [#uses=1]
+      %10 = and i64 %9, 15		; <i64> [#uses=1]
+      %11 = icmp eq i64 %10, 0		; <i1> [#uses=1]
+      br i1 %11, label %bb4, label %bb
 
 bb4:		; preds = %bb3, %bb2
-	%12 = ptrtoint float* %B_addr.0 to i64		; <i64> [#uses=1]
-	%13 = and i64 %12, 15		; <i64> [#uses=1]
-	%14 = icmp eq i64 %13, 0		; <i1> [#uses=1]
-	%15 = icmp sgt i32 %N_addr.0, 15		; <i1> [#uses=2]
-	br i1 %14, label %bb6.preheader, label %bb10.preheader
+      %12 = ptrtoint float* %B_addr.0 to i64		; <i64> [#uses=1]
+      %13 = and i64 %12, 15		; <i64> [#uses=1]
+      %14 = icmp eq i64 %13, 0		; <i1> [#uses=1]
+      %15 = icmp sgt i32 %N_addr.0, 15		; <i1> [#uses=2]
+      br i1 %14, label %bb6.preheader, label %bb10.preheader
 
 bb10.preheader:		; preds = %bb4
-	br i1 %15, label %bb9, label %bb12.loopexit
+      br i1 %15, label %bb9, label %bb12.loopexit
 
 bb6.preheader:		; preds = %bb4
-	br i1 %15, label %bb5, label %bb8.loopexit
+      br i1 %15, label %bb5, label %bb8.loopexit
 
 bb5:		; preds = %bb5, %bb6.preheader
-	%indvar143 = phi i64 [ 0, %bb6.preheader ], [ %indvar.next144, %bb5 ]		; <i64> [#uses=3]
-	%vSum0.072 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %21, %bb5 ]		; <<4 x float>> [#uses=1]
+      %indvar143 = phi i64 [ 0, %bb6.preheader ], [ %indvar.next144, %bb5 ]		; <i64> [#uses=3]
+      %vSum0.072 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %21, %bb5 ]		; <<4 x float>> [#uses=1]
 	%vSum1.070 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %29, %bb5 ]		; <<4 x float>> [#uses=1]
 	%vSum2.069 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %37, %bb5 ]		; <<4 x float>> [#uses=1]
 	%vSum3.067 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %45, %bb5 ]		; <<4 x float>> [#uses=1]
@@ -78,8 +78,8 @@ bb5:		; preds = %bb5, %bb6.preheader
 	%17 = load <4 x float>* %16, align 16		; <<4 x float>> [#uses=1]
 	%18 = bitcast float* %B_addr.271 to <4 x float>*		; <<4 x float>*> [#uses=1]
 	%19 = load <4 x float>* %18, align 16		; <<4 x float>> [#uses=1]
-	%20 = mul <4 x float> %17, %19		; <<4 x float>> [#uses=1]
-	%21 = add <4 x float> %20, %vSum0.072		; <<4 x float>> [#uses=2]
+	%20 = fmul <4 x float> %17, %19		; <<4 x float>> [#uses=1]
+	%21 = fadd <4 x float> %20, %vSum0.072		; <<4 x float>> [#uses=2]
 	%A_addr.273.sum163 = or i64 %A_addr.273.rec, 4		; <i64> [#uses=1]
 	%A_addr.0.sum175 = add i64 %B_addr.0.rec, %A_addr.273.sum163		; <i64> [#uses=2]
 	%22 = getelementptr float* %A, i64 %A_addr.0.sum175		; <float*> [#uses=1]
@@ -88,8 +88,8 @@ bb5:		; preds = %bb5, %bb6.preheader
 	%25 = getelementptr float* %B, i64 %A_addr.0.sum175		; <float*> [#uses=1]
 	%26 = bitcast float* %25 to <4 x float>*		; <<4 x float>*> [#uses=1]
 	%27 = load <4 x float>* %26, align 16		; <<4 x float>> [#uses=1]
-	%28 = mul <4 x float> %24, %27		; <<4 x float>> [#uses=1]
-	%29 = add <4 x float> %28, %vSum1.070		; <<4 x float>> [#uses=2]
+	%28 = fmul <4 x float> %24, %27		; <<4 x float>> [#uses=1]
+	%29 = fadd <4 x float> %28, %vSum1.070		; <<4 x float>> [#uses=2]
 	%A_addr.273.sum161 = or i64 %A_addr.273.rec, 8		; <i64> [#uses=1]
 	%A_addr.0.sum174 = add i64 %B_addr.0.rec, %A_addr.273.sum161		; <i64> [#uses=2]
 	%30 = getelementptr float* %A, i64 %A_addr.0.sum174		; <float*> [#uses=1]
@@ -98,8 +98,8 @@ bb5:		; preds = %bb5, %bb6.preheader
 	%33 = getelementptr float* %B, i64 %A_addr.0.sum174		; <float*> [#uses=1]
 	%34 = bitcast float* %33 to <4 x float>*		; <<4 x float>*> [#uses=1]
 	%35 = load <4 x float>* %34, align 16		; <<4 x float>> [#uses=1]
-	%36 = mul <4 x float> %32, %35		; <<4 x float>> [#uses=1]
-	%37 = add <4 x float> %36, %vSum2.069		; <<4 x float>> [#uses=2]
+	%36 = fmul <4 x float> %32, %35		; <<4 x float>> [#uses=1]
+	%37 = fadd <4 x float> %36, %vSum2.069		; <<4 x float>> [#uses=2]
 	%A_addr.273.sum159 = or i64 %A_addr.273.rec, 12		; <i64> [#uses=1]
 	%A_addr.0.sum173 = add i64 %B_addr.0.rec, %A_addr.273.sum159		; <i64> [#uses=2]
 	%38 = getelementptr float* %A, i64 %A_addr.0.sum173		; <float*> [#uses=1]
@@ -108,8 +108,8 @@ bb5:		; preds = %bb5, %bb6.preheader
 	%41 = getelementptr float* %B, i64 %A_addr.0.sum173		; <float*> [#uses=1]
 	%42 = bitcast float* %41 to <4 x float>*		; <<4 x float>*> [#uses=1]
 	%43 = load <4 x float>* %42, align 16		; <<4 x float>> [#uses=1]
-	%44 = mul <4 x float> %40, %43		; <<4 x float>> [#uses=1]
-	%45 = add <4 x float> %44, %vSum3.067		; <<4 x float>> [#uses=2]
+	%44 = fmul <4 x float> %40, %43		; <<4 x float>> [#uses=1]
+	%45 = fadd <4 x float> %44, %vSum3.067		; <<4 x float>> [#uses=2]
 	%.rec83 = add i64 %A_addr.273.rec, 16		; <i64> [#uses=1]
 	%A_addr.0.sum172 = add i64 %B_addr.0.rec, %.rec83		; <i64> [#uses=2]
 	%46 = getelementptr float* %A, i64 %A_addr.0.sum172		; <float*> [#uses=1]
@@ -132,8 +132,8 @@ bb7:		; preds = %bb7, %bb8.loopexit
 	%51 = load <4 x float>* %50, align 16		; <<4 x float>> [#uses=1]
 	%52 = bitcast float* %B_addr.359 to <4 x float>*		; <<4 x float>*> [#uses=1]
 	%53 = load <4 x float>* %52, align 16		; <<4 x float>> [#uses=1]
-	%54 = mul <4 x float> %51, %53		; <<4 x float>> [#uses=1]
-	%55 = add <4 x float> %54, %vSum0.260		; <<4 x float>> [#uses=2]
+	%54 = fmul <4 x float> %51, %53		; <<4 x float>> [#uses=1]
+	%55 = fadd <4 x float> %54, %vSum0.260		; <<4 x float>> [#uses=2]
 	%.rec85 = add i64 %A_addr.361.rec, 4		; <i64> [#uses=2]
 	%56 = getelementptr float* %A_addr.2.lcssa, i64 %.rec85		; <float*> [#uses=1]
 	%57 = getelementptr float* %B_addr.2.lcssa, i64 %.rec85		; <float*> [#uses=1]
@@ -185,23 +185,23 @@ bb9:		; preds = %bb9, %bb10.preheader
 	%71 = load <4 x float>* %70, align 1
 	%72 = bitcast float* %A_addr.440 to <4 x float>*		; <<4 x float>*> [#uses=1]
 	%73 = load <4 x float>* %72, align 16		; <<4 x float>> [#uses=1]
-	%74 = mul <4 x float> %73, %62		; <<4 x float>> [#uses=1]
-	%75 = add <4 x float> %74, %vSum0.339		; <<4 x float>> [#uses=2]
+	%74 = fmul <4 x float> %73, %62		; <<4 x float>> [#uses=1]
+	%75 = fadd <4 x float> %74, %vSum0.339		; <<4 x float>> [#uses=2]
 	%76 = getelementptr float* %A, i64 %B_addr.0.sum187		; <float*> [#uses=1]
 	%77 = bitcast float* %76 to <4 x float>*		; <<4 x float>*> [#uses=1]
 	%78 = load <4 x float>* %77, align 16		; <<4 x float>> [#uses=1]
-	%79 = mul <4 x float> %78, %65		; <<4 x float>> [#uses=1]
-	%80 = add <4 x float> %79, %vSum1.237		; <<4 x float>> [#uses=2]
+	%79 = fmul <4 x float> %78, %65		; <<4 x float>> [#uses=1]
+	%80 = fadd <4 x float> %79, %vSum1.237		; <<4 x float>> [#uses=2]
 	%81 = getelementptr float* %A, i64 %B_addr.0.sum186		; <float*> [#uses=1]
 	%82 = bitcast float* %81 to <4 x float>*		; <<4 x float>*> [#uses=1]
 	%83 = load <4 x float>* %82, align 16		; <<4 x float>> [#uses=1]
-	%84 = mul <4 x float> %83, %68		; <<4 x float>> [#uses=1]
-	%85 = add <4 x float> %84, %vSum2.236		; <<4 x float>> [#uses=2]
+	%84 = fmul <4 x float> %83, %68		; <<4 x float>> [#uses=1]
+	%85 = fadd <4 x float> %84, %vSum2.236		; <<4 x float>> [#uses=2]
 	%86 = getelementptr float* %A, i64 %B_addr.0.sum185		; <float*> [#uses=1]
 	%87 = bitcast float* %86 to <4 x float>*		; <<4 x float>*> [#uses=1]
 	%88 = load <4 x float>* %87, align 16		; <<4 x float>> [#uses=1]
-	%89 = mul <4 x float> %88, %71		; <<4 x float>> [#uses=1]
-	%90 = add <4 x float> %89, %vSum3.234		; <<4 x float>> [#uses=2]
+	%89 = fmul <4 x float> %88, %71		; <<4 x float>> [#uses=1]
+	%90 = fadd <4 x float> %89, %vSum3.234		; <<4 x float>> [#uses=2]
 	%.rec89 = add i64 %A_addr.440.rec, 16		; <i64> [#uses=1]
 	%A_addr.0.sum170 = add i64 %B_addr.0.rec, %.rec89		; <i64> [#uses=2]
 	%91 = getelementptr float* %A, i64 %A_addr.0.sum170		; <float*> [#uses=1]
@@ -224,8 +224,8 @@ bb11:		; preds = %bb11, %bb12.loopexit
 	%96 = load <4 x float>* %95, align 1
 	%97 = bitcast float* %A_addr.529 to <4 x float>*		; <<4 x float>*> [#uses=1]
 	%98 = load <4 x float>* %97, align 16		; <<4 x float>> [#uses=1]
-	%99 = mul <4 x float> %98, %96		; <<4 x float>> [#uses=1]
-	%100 = add <4 x float> %99, %vSum0.428		; <<4 x float>> [#uses=2]
+	%99 = fmul <4 x float> %98, %96		; <<4 x float>> [#uses=1]
+	%100 = fadd <4 x float> %99, %vSum0.428		; <<4 x float>> [#uses=2]
 	%.rec91 = add i64 %A_addr.529.rec, 4		; <i64> [#uses=2]
 	%101 = getelementptr float* %A_addr.4.lcssa, i64 %.rec91		; <float*> [#uses=1]
 	%102 = getelementptr float* %B_addr.4.lcssa, i64 %.rec91		; <float*> [#uses=1]
@@ -254,17 +254,17 @@ bb13:		; preds = %bb12.loopexit, %bb11, %bb8.loopexit, %bb7, %entry
 	%B_addr.1 = phi float* [ %B, %entry ], [ %B_addr.2.lcssa, %bb8.loopexit ], [ %57, %bb7 ], [ %B_addr.4.lcssa, %bb12.loopexit ], [ %102, %bb11 ]		; <float*> [#uses=1]
 	%vSum0.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum0.0.lcssa, %bb8.loopexit ], [ %55, %bb7 ], [ %vSum0.3.lcssa, %bb12.loopexit ], [ %100, %bb11 ]		; <<4 x float>> [#uses=1]
 	%A_addr.1 = phi float* [ %A, %entry ], [ %A_addr.2.lcssa, %bb8.loopexit ], [ %56, %bb7 ], [ %A_addr.4.lcssa, %bb12.loopexit ], [ %101, %bb11 ]		; <float*> [#uses=1]
-	%106 = add <4 x float> %vSum0.1, %vSum2.1		; <<4 x float>> [#uses=1]
-	%107 = add <4 x float> %vSum1.1, %vSum3.1		; <<4 x float>> [#uses=1]
-	%108 = add <4 x float> %106, %107		; <<4 x float>> [#uses=4]
+	%106 = fadd <4 x float> %vSum0.1, %vSum2.1		; <<4 x float>> [#uses=1]
+	%107 = fadd <4 x float> %vSum1.1, %vSum3.1		; <<4 x float>> [#uses=1]
+	%108 = fadd <4 x float> %106, %107		; <<4 x float>> [#uses=4]
 	%tmp23 = extractelement <4 x float> %108, i32 0		; <float> [#uses=1]
 	%tmp21 = extractelement <4 x float> %108, i32 1		; <float> [#uses=1]
-	%109 = add float %tmp23, %tmp21		; <float> [#uses=1]
+	%109 = fadd float %tmp23, %tmp21		; <float> [#uses=1]
 	%tmp19 = extractelement <4 x float> %108, i32 2		; <float> [#uses=1]
 	%tmp17 = extractelement <4 x float> %108, i32 3		; <float> [#uses=1]
-	%110 = add float %tmp19, %tmp17		; <float> [#uses=1]
-	%111 = add float %109, %110		; <float> [#uses=1]
-	%Sum0.254 = add float %111, %Sum0.1		; <float> [#uses=2]
+	%110 = fadd float %tmp19, %tmp17		; <float> [#uses=1]
+	%111 = fadd float %109, %110		; <float> [#uses=1]
+	%Sum0.254 = fadd float %111, %Sum0.1		; <float> [#uses=2]
 	%112 = icmp sgt i32 %N_addr.1, 0		; <i1> [#uses=1]
 	br i1 %112, label %bb.nph56, label %bb16
 
@@ -283,8 +283,8 @@ bb14:		; preds = %bb14, %bb.nph56
 	%A_addr.653 = getelementptr float* %A_addr.1, i64 %A_addr.653.rec		; <float*> [#uses=1]
 	%113 = load float* %A_addr.653, align 4		; <float> [#uses=1]
 	%114 = load float* %B_addr.652, align 4		; <float> [#uses=1]
-	%115 = mul float %113, %114		; <float> [#uses=1]
-	%Sum0.2 = add float %115, %Sum0.255		; <float> [#uses=2]
+	%115 = fmul float %113, %114		; <float> [#uses=1]
+	%Sum0.2 = fadd float %115, %Sum0.255		; <float> [#uses=2]
 	%indvar.next118 = add i64 %indvar117, 1		; <i64> [#uses=2]
 	%exitcond = icmp eq i64 %indvar.next118, %tmp.		; <i1> [#uses=1]
 	br i1 %exitcond, label %bb16, label %bb14
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index e10253516ba5..0bf347c64271 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -20,16 +20,16 @@ loop:
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = add i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 10
@@ -48,16 +48,16 @@ loop:
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = sub i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 0
@@ -77,17 +77,17 @@ loop:
 	%indvar.i8 = ashr i64 %s0, 8
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%s1 = shl i64 %indvar, 24
 	%indvar.i24 = ashr i64 %s1, 24
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = add i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 10
@@ -107,17 +107,17 @@ loop:
 	%indvar.i8 = ashr i64 %s0, 8
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%s1 = shl i64 %indvar, 24
 	%indvar.i24 = ashr i64 %s1, 24
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = sub i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 0
@@ -136,16 +136,16 @@ loop:
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = add i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 0
@@ -164,16 +164,16 @@ loop:
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = sub i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 18446744073709551615
@@ -193,17 +193,17 @@ loop:
 	%indvar.i8 = ashr i64 %s0, 8
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%s1 = shl i64 %indvar, 24
 	%indvar.i24 = ashr i64 %s1, 24
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = add i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 0
@@ -223,17 +223,17 @@ loop:
 	%indvar.i8 = ashr i64 %s0, 8
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%s1 = shl i64 %indvar, 24
 	%indvar.i24 = ashr i64 %s1, 24
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = sub i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 18446744073709551615
diff --git a/test/CodeGen/X86/masked-iv-unsafe.ll b/test/CodeGen/X86/masked-iv-unsafe.ll
index 7ccfe855a6ad..639a7a6a3bb0 100644
--- a/test/CodeGen/X86/masked-iv-unsafe.ll
+++ b/test/CodeGen/X86/masked-iv-unsafe.ll
@@ -15,16 +15,16 @@ loop:
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = add i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 0
@@ -43,16 +43,16 @@ loop:
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = sub i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 20
@@ -72,17 +72,17 @@ loop:
 	%indvar.i8 = ashr i64 %s0, 8
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%s1 = shl i64 %indvar, 24
 	%indvar.i24 = ashr i64 %s1, 24
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = add i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 0
@@ -102,17 +102,17 @@ loop:
 	%indvar.i8 = ashr i64 %s0, 8
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%s1 = shl i64 %indvar, 24
 	%indvar.i24 = ashr i64 %s1, 24
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = sub i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 20
@@ -131,16 +131,16 @@ loop:
         %indvar.i8 = and i64 %indvar, 255
         %t0 = getelementptr double* %d, i64 %indvar.i8
         %t1 = load double* %t0
-        %t2 = mul double %t1, 0.1
+        %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %indvar.i24 = and i64 %indvar, 16777215
         %t3 = getelementptr double* %d, i64 %indvar.i24
         %t4 = load double* %t3
-        %t5 = mul double %t4, 2.3
+        %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double* %d, i64 %indvar
         %t7 = load double* %t6
-        %t8 = mul double %t7, 4.5
+        %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = add i64 %indvar, 1
         %exitcond = icmp eq i64 %indvar.next, %n
@@ -159,16 +159,16 @@ loop:
         %indvar.i8 = and i64 %indvar, 255
         %t0 = getelementptr double* %d, i64 %indvar.i8
         %t1 = load double* %t0
-        %t2 = mul double %t1, 0.1
+        %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %indvar.i24 = and i64 %indvar, 16777215
         %t3 = getelementptr double* %d, i64 %indvar.i24
         %t4 = load double* %t3
-        %t5 = mul double %t4, 2.3
+        %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double* %d, i64 %indvar
         %t7 = load double* %t6
-        %t8 = mul double %t7, 4.5
+        %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = sub i64 %indvar, 1
         %exitcond = icmp eq i64 %indvar.next, 10
@@ -188,17 +188,17 @@ loop:
         %indvar.i8 = ashr i64 %s0, 8
         %t0 = getelementptr double* %d, i64 %indvar.i8
         %t1 = load double* %t0
-        %t2 = mul double %t1, 0.1
+        %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %s1 = shl i64 %indvar, 24
         %indvar.i24 = ashr i64 %s1, 24
         %t3 = getelementptr double* %d, i64 %indvar.i24
         %t4 = load double* %t3
-        %t5 = mul double %t4, 2.3
+        %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double* %d, i64 %indvar
         %t7 = load double* %t6
-        %t8 = mul double %t7, 4.5
+        %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = add i64 %indvar, 1
         %exitcond = icmp eq i64 %indvar.next, %n
@@ -218,17 +218,17 @@ loop:
         %indvar.i8 = ashr i64 %s0, 8
         %t0 = getelementptr double* %d, i64 %indvar.i8
         %t1 = load double* %t0
-        %t2 = mul double %t1, 0.1
+        %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %s1 = shl i64 %indvar, 24
         %indvar.i24 = ashr i64 %s1, 24
         %t3 = getelementptr double* %d, i64 %indvar.i24
         %t4 = load double* %t3
-        %t5 = mul double %t4, 2.3
+        %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double* %d, i64 %indvar
         %t7 = load double* %t6
-        %t8 = mul double %t7, 4.5
+        %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = sub i64 %indvar, 1
         %exitcond = icmp eq i64 %indvar.next, 10
@@ -247,16 +247,16 @@ loop:
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = sub i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 18446744073709551615
@@ -275,16 +275,16 @@ loop:
         %indvar.i8 = and i64 %indvar, 255
         %t0 = getelementptr double* %d, i64 %indvar.i8
         %t1 = load double* %t0
-        %t2 = mul double %t1, 0.1
+        %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %indvar.i24 = and i64 %indvar, 16777215
         %t3 = getelementptr double* %d, i64 %indvar.i24
         %t4 = load double* %t3
-        %t5 = mul double %t4, 2.3
+        %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double* %d, i64 %indvar
         %t7 = load double* %t6
-        %t8 = mul double %t7, 4.5
+        %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = add i64 %indvar, 3
         %exitcond = icmp eq i64 %indvar.next, 10
@@ -303,16 +303,16 @@ loop:
         %indvar.i8 = and i64 %indvar, 255
         %t0 = getelementptr double* %d, i64 %indvar.i8
         %t1 = load double* %t0
-        %t2 = mul double %t1, 0.1
+        %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %indvar.i24 = and i64 %indvar, 16777215
         %t3 = getelementptr double* %d, i64 %indvar.i24
         %t4 = load double* %t3
-        %t5 = mul double %t4, 2.3
+        %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double* %d, i64 %indvar
         %t7 = load double* %t6
-        %t8 = mul double %t7, 4.5
+        %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = sub i64 %indvar, 3
         %exitcond = icmp eq i64 %indvar.next, 0
@@ -332,17 +332,17 @@ loop:
         %indvar.i8 = ashr i64 %s0, 8
         %t0 = getelementptr double* %d, i64 %indvar.i8
         %t1 = load double* %t0
-        %t2 = mul double %t1, 0.1
+        %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %s1 = shl i64 %indvar, 24
         %indvar.i24 = ashr i64 %s1, 24
         %t3 = getelementptr double* %d, i64 %indvar.i24
         %t4 = load double* %t3
-        %t5 = mul double %t4, 2.3
+        %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double* %d, i64 %indvar
         %t7 = load double* %t6
-        %t8 = mul double %t7, 4.5
+        %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = add i64 %indvar, 3
         %exitcond = icmp eq i64 %indvar.next, 10
@@ -362,17 +362,17 @@ loop:
         %indvar.i8 = ashr i64 %s0, 8
         %t0 = getelementptr double* %d, i64 %indvar.i8
         %t1 = load double* %t0
-        %t2 = mul double %t1, 0.1
+        %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %s1 = shl i64 %indvar, 24
         %indvar.i24 = ashr i64 %s1, 24
         %t3 = getelementptr double* %d, i64 %indvar.i24
         %t4 = load double* %t3
-        %t5 = mul double %t4, 2.3
+        %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double* %d, i64 %indvar
         %t7 = load double* %t6
-        %t8 = mul double %t7, 4.5
+        %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = sub i64 %indvar, 3
         %exitcond = icmp eq i64 %indvar.next, 0
diff --git a/test/CodeGen/X86/multiple-return-values.ll b/test/CodeGen/X86/multiple-return-values.ll
index 2e754a8febc2..5f7a83f88458 100644
--- a/test/CodeGen/X86/multiple-return-values.ll
+++ b/test/CodeGen/X86/multiple-return-values.ll
@@ -2,7 +2,7 @@
 
 define {i64, float} @bar(i64 %a, float %b) {
         %y = add i64 %a, 7
-        %z = add float %b, 7.0
+        %z = fadd float %b, 7.0
 	ret i64 %y, float %z
 }
 
diff --git a/test/CodeGen/X86/neg_fp.ll b/test/CodeGen/X86/neg_fp.ll
index 55c76549ffe2..1a7ee085b5de 100644
--- a/test/CodeGen/X86/neg_fp.ll
+++ b/test/CodeGen/X86/neg_fp.ll
@@ -6,7 +6,7 @@
 
 define float @negfp(float %a, float %b) {
 entry:
-	%sub = sub float %a, %b		; <float> [#uses=1]
-	%neg = sub float -0.000000e+00, %sub		; <float> [#uses=1]
+	%sub = fsub float %a, %b		; <float> [#uses=1]
+	%neg = fsub float -0.000000e+00, %sub		; <float> [#uses=1]
 	ret float %neg
 }
 \ No newline at end of file
diff --git a/test/CodeGen/X86/negate-add-zero.ll b/test/CodeGen/X86/negate-add-zero.ll
index 59a2bd09c18f..689639f5f06d 100644
--- a/test/CodeGen/X86/negate-add-zero.ll
+++ b/test/CodeGen/X86/negate-add-zero.ll
@@ -843,14 +843,14 @@ entry:
 	%8 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 2		; <double*> [#uses=0]
 	%9 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 3		; <double*> [#uses=0]
 	%10 = load double* null, align 8		; <double> [#uses=2]
-	%11 = sub double -0.000000e+00, %10		; <double> [#uses=1]
+	%11 = fsub double -0.000000e+00, %10		; <double> [#uses=1]
 	%12 = load double* null, align 8		; <double> [#uses=2]
 	%13 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 1, i32 0, i32 0, i32 0, i32 3		; <double*> [#uses=1]
 	%14 = load double* %13, align 8		; <double> [#uses=2]
-	%15 = sub double -0.000000e+00, %14		; <double> [#uses=1]
+	%15 = fsub double -0.000000e+00, %14		; <double> [#uses=1]
 	%16 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 1, i32 0, i32 0, i32 0, i32 2		; <double*> [#uses=1]
 	%17 = load double* %16, align 8		; <double> [#uses=2]
-	%18 = sub double -0.000000e+00, %17		; <double> [#uses=1]
+	%18 = fsub double -0.000000e+00, %17		; <double> [#uses=1]
 	%19 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 0		; <double*> [#uses=0]
 	%20 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 3		; <double*> [#uses=0]
 	%21 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 6		; <double*> [#uses=0]
@@ -866,28 +866,28 @@ entry:
 	%31 = getelementptr %"struct.FixedMatrix<double,1,3,0,0>"* null, i32 0, i32 0, i32 0, i32 0		; <double*> [#uses=0]
 	%32 = getelementptr %"struct.FixedMatrix<double,1,3,0,0>"* null, i32 0, i32 0, i32 0, i32 1		; <double*> [#uses=1]
 	%33 = getelementptr %"struct.FixedMatrix<double,1,3,0,0>"* null, i32 0, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%34 = mul double %17, %5		; <double> [#uses=1]
-	%35 = add double 0.000000e+00, %34		; <double> [#uses=1]
-	%36 = add double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
-	%37 = mul double %14, %3		; <double> [#uses=1]
-	%38 = add double %36, %37		; <double> [#uses=1]
-	%39 = mul double %12, %4		; <double> [#uses=1]
-	%40 = add double %38, %39		; <double> [#uses=1]
-	%41 = mul double %5, %11		; <double> [#uses=1]
-	%42 = add double %40, %41		; <double> [#uses=2]
+	%34 = fmul double %17, %5		; <double> [#uses=1]
+	%35 = fadd double 0.000000e+00, %34		; <double> [#uses=1]
+	%36 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%37 = fmul double %14, %3		; <double> [#uses=1]
+	%38 = fadd double %36, %37		; <double> [#uses=1]
+	%39 = fmul double %12, %4		; <double> [#uses=1]
+	%40 = fadd double %38, %39		; <double> [#uses=1]
+	%41 = fmul double %5, %11		; <double> [#uses=1]
+	%42 = fadd double %40, %41		; <double> [#uses=2]
 	store double %42, double* %32, align 8
-	%43 = mul double %2, %15		; <double> [#uses=1]
-	%44 = add double %43, 0.000000e+00		; <double> [#uses=1]
-	%45 = mul double %3, %18		; <double> [#uses=1]
-	%46 = add double %44, %45		; <double> [#uses=1]
-	%47 = mul double %10, %4		; <double> [#uses=1]
-	%48 = add double %46, %47		; <double> [#uses=1]
-	%49 = mul double %12, %5		; <double> [#uses=1]
-	%50 = add double %48, %49		; <double> [#uses=2]
+	%43 = fmul double %2, %15		; <double> [#uses=1]
+	%44 = fadd double %43, 0.000000e+00		; <double> [#uses=1]
+	%45 = fmul double %3, %18		; <double> [#uses=1]
+	%46 = fadd double %44, %45		; <double> [#uses=1]
+	%47 = fmul double %10, %4		; <double> [#uses=1]
+	%48 = fadd double %46, %47		; <double> [#uses=1]
+	%49 = fmul double %12, %5		; <double> [#uses=1]
+	%50 = fadd double %48, %49		; <double> [#uses=2]
 	store double %50, double* %33, align 8
-	%51 = mul double %35, 2.000000e+00		; <double> [#uses=1]
-	%52 = mul double %42, 2.000000e+00		; <double> [#uses=1]
-	%53 = mul double %50, 2.000000e+00		; <double> [#uses=1]
+	%51 = fmul double %35, 2.000000e+00		; <double> [#uses=1]
+	%52 = fmul double %42, 2.000000e+00		; <double> [#uses=1]
+	%53 = fmul double %50, 2.000000e+00		; <double> [#uses=1]
 	%54 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0		; <double*> [#uses=1]
 	store double %51, double* %54, align 8
 	%55 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 1		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/negative-sin.ll b/test/CodeGen/X86/negative-sin.ll
index 39c62973def9..8cc1bec2d1f9 100644
--- a/test/CodeGen/X86/negative-sin.ll
+++ b/test/CodeGen/X86/negative-sin.ll
@@ -5,8 +5,8 @@ declare double @sin(double %f)
 
 define double @foo(double %e)
 {
-  %f = sub double 0.0, %e
+  %f = fsub double 0.0, %e
   %g = call double @sin(double %f)
-  %h = sub double 0.0, %g
+  %h = fsub double 0.0, %g
   ret double %h
 }
diff --git a/test/CodeGen/X86/peep-test-0.ll b/test/CodeGen/X86/peep-test-0.ll
index a95b56404438..8dcd23ae735d 100644
--- a/test/CodeGen/X86/peep-test-0.ll
+++ b/test/CodeGen/X86/peep-test-0.ll
@@ -11,7 +11,7 @@ bb:
 	%i.03 = add i64 %indvar, %n
 	%0 = getelementptr double* %d, i64 %i.03
 	%1 = load double* %0, align 8
-	%2 = mul double %1, 3.000000e+00
+	%2 = fmul double %1, 3.000000e+00
 	store double %2, double* %0, align 8
 	%indvar.next = add i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 0
diff --git a/test/CodeGen/X86/peep-test-1.ll b/test/CodeGen/X86/peep-test-1.ll
index b4698e37b42a..85e3bf251133 100644
--- a/test/CodeGen/X86/peep-test-1.ll
+++ b/test/CodeGen/X86/peep-test-1.ll
@@ -11,7 +11,7 @@ bb:
 	%i.03 = sub i32 %n, %indvar
 	%1 = getelementptr double* %p, i32 %i.03
 	%2 = load double* %1, align 4
-	%3 = mul double %2, 2.930000e+00
+	%3 = fmul double %2, 2.930000e+00
 	store double %3, double* %1, align 4
 	%4 = add i32 %i.03, -1
 	%phitmp = icmp slt i32 %4, 0
diff --git a/test/CodeGen/X86/phys_subreg_coalesce.ll b/test/CodeGen/X86/phys_subreg_coalesce.ll
index 789a4bae32b7..3bbc55da16ab 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce.ll
@@ -8,16 +8,16 @@ entry:
 	%1 = sitofp i32 %0 to double		; <double> [#uses=1]
 	%2 = trunc i64 %p2.0 to i32		; <i32> [#uses=1]
 	%3 = sitofp i32 %2 to double		; <double> [#uses=1]
-	%4 = add double %1, %3		; <double> [#uses=1]
-	%5 = mul double %4, 5.000000e-01		; <double> [#uses=1]
+	%4 = fadd double %1, %3		; <double> [#uses=1]
+	%5 = fmul double %4, 5.000000e-01		; <double> [#uses=1]
 	%6 = lshr i64 %p1.0, 32		; <i64> [#uses=1]
 	%7 = trunc i64 %6 to i32		; <i32> [#uses=1]
 	%8 = sitofp i32 %7 to double		; <double> [#uses=1]
 	%9 = lshr i64 %p2.0, 32		; <i64> [#uses=1]
 	%10 = trunc i64 %9 to i32		; <i32> [#uses=1]
 	%11 = sitofp i32 %10 to double		; <double> [#uses=1]
-	%12 = add double %8, %11		; <double> [#uses=1]
-	%13 = mul double %12, 5.000000e-01		; <double> [#uses=1]
+	%12 = fadd double %8, %11		; <double> [#uses=1]
+	%13 = fmul double %12, 5.000000e-01		; <double> [#uses=1]
 	%mrv3 = insertvalue %struct.dpoint undef, double %5, 0		; <%struct.dpoint> [#uses=1]
 	%mrv4 = insertvalue %struct.dpoint %mrv3, double %13, 1		; <%struct.dpoint> [#uses=1]
 	ret %struct.dpoint %mrv4
diff --git a/test/CodeGen/X86/pr2656.ll b/test/CodeGen/X86/pr2656.ll
index 3f6c3652ff58..96976b8e466a 100644
--- a/test/CodeGen/X86/pr2656.ll
+++ b/test/CodeGen/X86/pr2656.ll
@@ -12,9 +12,9 @@ entry:
 	%tmp1 = load float* %tmp		; <float> [#uses=1]
 	%tmp2 = getelementptr %struct.anon* %p, i32 0, i32 1		; <float*> [#uses=1]
 	%tmp3 = load float* %tmp2		; <float> [#uses=1]
-	%neg = sub float -0.000000e+00, %tmp1		; <float> [#uses=1]
+	%neg = fsub float -0.000000e+00, %tmp1		; <float> [#uses=1]
 	%conv = fpext float %neg to double		; <double> [#uses=1]
-	%neg4 = sub float -0.000000e+00, %tmp3		; <float> [#uses=1]
+	%neg4 = fsub float -0.000000e+00, %tmp3		; <float> [#uses=1]
 	%conv5 = fpext float %neg4 to double		; <double> [#uses=1]
 	%call = call i32 (...)* @printf( i8* getelementptr ([17 x i8]* @.str, i32 0, i32 0), double %conv, double %conv5 )		; <i32> [#uses=0]
 	ret void
diff --git a/test/CodeGen/X86/pr3154.ll b/test/CodeGen/X86/pr3154.ll
index a1ed0c2c9824..73f51018817a 100644
--- a/test/CodeGen/X86/pr3154.ll
+++ b/test/CodeGen/X86/pr3154.ll
@@ -22,7 +22,7 @@ bb:		; preds = %entry
 bb19:		; preds = %bb, %entry
 	%data15.0 = phi double* [ %7, %bb ], [ %3, %entry ]		; <double*> [#uses=5]
 	%8 = sitofp i32 %len to double		; <double> [#uses=1]
-	%9 = sub double %8, 1.000000e+00		; <double> [#uses=1]
+	%9 = fsub double %8, 1.000000e+00		; <double> [#uses=1]
 	%10 = fdiv double 2.000000e+00, %9		; <double> [#uses=1]
 	store double %10, double* %c, align 8
 	%11 = ashr i32 %len, 1		; <i32> [#uses=3]
diff --git a/test/CodeGen/X86/pr3457.ll b/test/CodeGen/X86/pr3457.ll
index 36d4a5d554f5..d4a98103ecc5 100644
--- a/test/CodeGen/X86/pr3457.ll
+++ b/test/CodeGen/X86/pr3457.ll
@@ -6,9 +6,9 @@ define void @foo(double* nocapture %P) nounwind {
 entry:
 	%0 = tail call double (...)* @test() nounwind		; <double> [#uses=2]
 	%1 = tail call double (...)* @test() nounwind		; <double> [#uses=2]
-	%2 = mul double %0, %0		; <double> [#uses=1]
-	%3 = mul double %1, %1		; <double> [#uses=1]
-	%4 = add double %2, %3		; <double> [#uses=1]
+	%2 = fmul double %0, %0		; <double> [#uses=1]
+	%3 = fmul double %1, %1		; <double> [#uses=1]
+	%4 = fadd double %2, %3		; <double> [#uses=1]
 	store double %4, double* %P, align 8
 	ret void
 }
diff --git a/test/CodeGen/X86/pre-split1.ll b/test/CodeGen/X86/pre-split1.ll
index 99a46b6e5fd2..4f9a5820e043 100644
--- a/test/CodeGen/X86/pre-split1.ll
+++ b/test/CodeGen/X86/pre-split1.ll
@@ -5,17 +5,17 @@
 define void @test(double* %P, i32 %cond) nounwind {
 entry:
 	%0 = load double* %P, align 8		; <double> [#uses=1]
-	%1 = add double %0, 4.000000e+00		; <double> [#uses=2]
+	%1 = fadd double %0, 4.000000e+00		; <double> [#uses=2]
 	%2 = icmp eq i32 %cond, 0		; <i1> [#uses=1]
 	br i1 %2, label %bb1, label %bb
 
 bb:		; preds = %entry
-	%3 = add double %1, 4.000000e+00		; <double> [#uses=1]
+	%3 = fadd double %1, 4.000000e+00		; <double> [#uses=1]
 	br label %bb1
 
 bb1:		; preds = %bb, %entry
 	%A.0 = phi double [ %3, %bb ], [ %1, %entry ]		; <double> [#uses=1]
-	%4 = mul double %A.0, 4.000000e+00		; <double> [#uses=1]
+	%4 = fmul double %A.0, 4.000000e+00		; <double> [#uses=1]
 	%5 = tail call i32 (...)* @bar() nounwind		; <i32> [#uses=0]
 	store double %4, double* %P, align 8
 	ret void
diff --git a/test/CodeGen/X86/pre-split10.ll b/test/CodeGen/X86/pre-split10.ll
index c3e18c4723c1..60297e9a5dc6 100644
--- a/test/CodeGen/X86/pre-split10.ll
+++ b/test/CodeGen/X86/pre-split10.ll
@@ -7,9 +7,9 @@ entry:
 bb14.i:		; preds = %bb14.i, %entry
 	%i8.0.reg2mem.0.i = phi i32 [ 0, %entry ], [ %0, %bb14.i ]		; <i32> [#uses=1]
 	%0 = add i32 %i8.0.reg2mem.0.i, 1		; <i32> [#uses=2]
-	%1 = add double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
-	%2 = add double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
-	%3 = add double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%1 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%2 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%3 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
 	%exitcond75.i = icmp eq i32 %0, 32		; <i1> [#uses=1]
 	br i1 %exitcond75.i, label %bb24.i, label %bb14.i
 
@@ -37,13 +37,13 @@ bb7.i.i:		; preds = %bb35.preheader.i, %bb5.i.i
 	br label %bb35.preheader.i
 
 bb35.preheader.i:		; preds = %bb7.i.i, %bb33.i
-	%9 = sub double 0.000000e+00, %4		; <double> [#uses=1]
+	%9 = fsub double 0.000000e+00, %4		; <double> [#uses=1]
 	store double %9, double* null, align 8
-	%10 = sub double 0.000000e+00, %5		; <double> [#uses=1]
+	%10 = fsub double 0.000000e+00, %5		; <double> [#uses=1]
 	store double %10, double* null, align 8
-	%11 = sub double 0.000000e+00, %6		; <double> [#uses=1]
+	%11 = fsub double 0.000000e+00, %6		; <double> [#uses=1]
 	store double %11, double* null, align 8
-	%12 = sub double 0.000000e+00, %7		; <double> [#uses=1]
+	%12 = fsub double 0.000000e+00, %7		; <double> [#uses=1]
 	store double %12, double* null, align 8
 	br i1 false, label %bb7.i.i, label %bb5.i.i
 }
diff --git a/test/CodeGen/X86/pre-split4.ll b/test/CodeGen/X86/pre-split4.ll
index 97401b3e7d56..a570f7304f37 100644
--- a/test/CodeGen/X86/pre-split4.ll
+++ b/test/CodeGen/X86/pre-split4.ll
@@ -10,14 +10,14 @@ bb:		; preds = %bb, %entry
 	%Flint.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %5, %bb ]		; <double> [#uses=1]
 	%twoThrd.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=1]
 	%0 = tail call double @llvm.pow.f64(double 0x3FE5555555555555, double 0.000000e+00)		; <double> [#uses=1]
-	%1 = add double %0, %twoThrd.0.reg2mem.0		; <double> [#uses=1]
+	%1 = fadd double %0, %twoThrd.0.reg2mem.0		; <double> [#uses=1]
 	%2 = tail call double @sin(double %k.0.reg2mem.0) nounwind readonly		; <double> [#uses=1]
-	%3 = mul double 0.000000e+00, %2		; <double> [#uses=1]
+	%3 = fmul double 0.000000e+00, %2		; <double> [#uses=1]
 	%4 = fdiv double 1.000000e+00, %3		; <double> [#uses=1]
         store double %Flint.0.reg2mem.0, double* null
         store double %twoThrd.0.reg2mem.0, double* null
-	%5 = add double %4, %Flint.0.reg2mem.0		; <double> [#uses=1]
-	%6 = add double %k.0.reg2mem.0, 1.000000e+00		; <double> [#uses=1]
+	%5 = fadd double %4, %Flint.0.reg2mem.0		; <double> [#uses=1]
+	%6 = fadd double %k.0.reg2mem.0, 1.000000e+00		; <double> [#uses=1]
 	br label %bb
 }
 
diff --git a/test/CodeGen/X86/pre-split5.ll b/test/CodeGen/X86/pre-split5.ll
index d3538250b3c5..b83003f30fea 100644
--- a/test/CodeGen/X86/pre-split5.ll
+++ b/test/CodeGen/X86/pre-split5.ll
@@ -40,7 +40,7 @@ bb28:		; preds = %bb14
 
 bb30:		; preds = %bb36, %bb28
 	%m.1.reg2mem.0 = phi i32 [ %m.0, %bb36 ], [ 0, %bb28 ]		; <i32> [#uses=1]
-	%1 = mul double 0.000000e+00, %0		; <double> [#uses=1]
+	%1 = fmul double 0.000000e+00, %0		; <double> [#uses=1]
 	%2 = fptosi double %1 to i32		; <i32> [#uses=1]
 	br i1 false, label %bb36, label %bb35
 
diff --git a/test/CodeGen/X86/pre-split6.ll b/test/CodeGen/X86/pre-split6.ll
index 780822372f64..e771b8067c21 100644
--- a/test/CodeGen/X86/pre-split6.ll
+++ b/test/CodeGen/X86/pre-split6.ll
@@ -20,14 +20,14 @@ bb.nph:		; preds = %entry
 bb9.i:		; preds = %bb.nph
 	%3 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
 	%4 = fdiv double 1.000000e+00, %1		; <double> [#uses=1]
-	%5 = mul double %4, 0.000000e+00		; <double> [#uses=1]
+	%5 = fmul double %4, 0.000000e+00		; <double> [#uses=1]
 	%6 = tail call double @asin(double %5) nounwind readonly		; <double> [#uses=0]
 	unreachable
 
 bb13.i:		; preds = %bb.nph
 	%7 = fdiv double 1.000000e+00, %1		; <double> [#uses=1]
 	%8 = tail call double @sin(double 0.000000e+00) nounwind readonly		; <double> [#uses=1]
-	%9 = mul double %7, %8		; <double> [#uses=1]
+	%9 = fmul double %7, %8		; <double> [#uses=1]
 	%10 = tail call double @asin(double %9) nounwind readonly		; <double> [#uses=0]
 	unreachable
 
diff --git a/test/CodeGen/X86/pre-split7.ll b/test/CodeGen/X86/pre-split7.ll
index 7f7b933e8a93..cd9d205a7138 100644
--- a/test/CodeGen/X86/pre-split7.ll
+++ b/test/CodeGen/X86/pre-split7.ll
@@ -17,15 +17,15 @@ entry:
 
 bb:		; preds = %bb, %entry
 	%0 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=1]
-	%1 = add double 0.000000e+00, %0		; <double> [#uses=2]
+	%1 = fadd double 0.000000e+00, %0		; <double> [#uses=2]
 	%2 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=1]
-	%3 = sub double %1, %2		; <double> [#uses=2]
+	%3 = fsub double %1, %2		; <double> [#uses=2]
 	store double %3, double* @axis_slope_angle, align 8
 	%4 = fdiv double %1, 2.000000e+00		; <double> [#uses=1]
 	%5 = tail call double @sin(double %4) nounwind readonly		; <double> [#uses=1]
-	%6 = mul double 0.000000e+00, %5		; <double> [#uses=1]
+	%6 = fmul double 0.000000e+00, %5		; <double> [#uses=1]
 	%7 = tail call double @tan(double %3) nounwind readonly		; <double> [#uses=0]
-	%8 = add double 0.000000e+00, %6		; <double> [#uses=1]
+	%8 = fadd double 0.000000e+00, %6		; <double> [#uses=1]
 	store double %8, double* @object_distance, align 8
 	br label %bb
 
diff --git a/test/CodeGen/X86/pre-split8.ll b/test/CodeGen/X86/pre-split8.ll
index eb6d49ff4a0f..22598195ed12 100644
--- a/test/CodeGen/X86/pre-split8.ll
+++ b/test/CodeGen/X86/pre-split8.ll
@@ -19,12 +19,12 @@ bb:		; preds = %bb9.i, %entry
 	br i1 %1, label %bb9.i, label %bb13.i
 
 bb9.i:		; preds = %bb
-	%2 = sub double %.rle4, %0		; <double> [#uses=0]
+	%2 = fsub double %.rle4, %0		; <double> [#uses=0]
 	%3 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
-	%4 = mul double 0.000000e+00, %0		; <double> [#uses=1]
+	%4 = fmul double 0.000000e+00, %0		; <double> [#uses=1]
 	%5 = tail call double @tan(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
-	%6 = mul double %4, 0.000000e+00		; <double> [#uses=1]
-	%7 = add double %6, 0.000000e+00		; <double> [#uses=1]
+	%6 = fmul double %4, 0.000000e+00		; <double> [#uses=1]
+	%7 = fadd double %6, 0.000000e+00		; <double> [#uses=1]
 	br i1 false, label %return, label %bb
 
 bb13.i:		; preds = %bb
diff --git a/test/CodeGen/X86/pre-split9.ll b/test/CodeGen/X86/pre-split9.ll
index bfafe85b8c67..1be960f53a54 100644
--- a/test/CodeGen/X86/pre-split9.ll
+++ b/test/CodeGen/X86/pre-split9.ll
@@ -21,13 +21,13 @@ bb:		; preds = %bb9.i, %entry
 	br i1 %1, label %bb9.i, label %bb13.i
 
 bb9.i:		; preds = %bb
-	%2 = sub double %.rle4, %0		; <double> [#uses=0]
+	%2 = fsub double %.rle4, %0		; <double> [#uses=0]
 	%3 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
 	%4 = tail call double @sin(double 0.000000e+00) nounwind readonly		; <double> [#uses=1]
-	%5 = mul double %4, %0		; <double> [#uses=1]
+	%5 = fmul double %4, %0		; <double> [#uses=1]
 	%6 = tail call double @tan(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
-	%7 = mul double %5, 0.000000e+00		; <double> [#uses=1]
-	%8 = add double %7, 0.000000e+00		; <double> [#uses=1]
+	%7 = fmul double %5, 0.000000e+00		; <double> [#uses=1]
+	%8 = fadd double %7, 0.000000e+00		; <double> [#uses=1]
 	br i1 false, label %return, label %bb
 
 bb13.i:		; preds = %bb
diff --git a/test/CodeGen/X86/red-zone2.ll b/test/CodeGen/X86/red-zone2.ll
new file mode 100644
index 000000000000..dea7d7eb0ea4
--- /dev/null
+++ b/test/CodeGen/X86/red-zone2.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: grep subq %t | count 1
+; RUN: grep addq %t | count 1
+
+define x86_fp80 @f0(float %f) nounwind readnone noredzone {
+entry:
+	%0 = fpext float %f to x86_fp80		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %0
+}
diff --git a/test/CodeGen/X86/remat-constant.ll b/test/CodeGen/X86/remat-constant.ll
index d9ef6fe76d7d..4c983b014883 100644
--- a/test/CodeGen/X86/remat-constant.ll
+++ b/test/CodeGen/X86/remat-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -relocation-model=static -aggressive-remat | grep xmm | count 2
+; RUN: llvm-as < %s | llc -mtriple=x86_64-linux -relocation-model=static -aggressive-remat | grep xmm | count 2
 
 declare void @bar() nounwind
 
diff --git a/test/CodeGen/X86/shrink-fp-const1.ll b/test/CodeGen/X86/shrink-fp-const1.ll
index 966e69ec7798..3406aeeeb5c5 100644
--- a/test/CodeGen/X86/shrink-fp-const1.ll
+++ b/test/CodeGen/X86/shrink-fp-const1.ll
@@ -2,6 +2,6 @@
 ; PR1264
 
 define double @foo(double %x) {
-        %y = mul double %x, 5.000000e-01
+        %y = fmul double %x, 5.000000e-01
         ret double %y
 }
diff --git a/test/CodeGen/X86/small-byval-memcpy.ll b/test/CodeGen/X86/small-byval-memcpy.ll
index dedd948c297b..8b87f7449cde 100644
--- a/test/CodeGen/X86/small-byval-memcpy.ll
+++ b/test/CodeGen/X86/small-byval-memcpy.ll
@@ -8,7 +8,7 @@ entry:
 	%iz = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=3]
 	%tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1		; <x86_fp80*> [#uses=1]
 	%tmp2 = load x86_fp80* %tmp1, align 16		; <x86_fp80> [#uses=1]
-	%tmp3 = sub x86_fp80 0xK80000000000000000000, %tmp2		; <x86_fp80> [#uses=1]
+	%tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2		; <x86_fp80> [#uses=1]
 	%tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1		; <x86_fp80*> [#uses=1]
 	%real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0		; <x86_fp80*> [#uses=1]
 	%tmp6 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0		; <x86_fp80*> [#uses=1]
diff --git a/test/CodeGen/X86/soft-fp.ll b/test/CodeGen/X86/soft-fp.ll
index 7fa8fed33c99..0c697def1ec2 100644
--- a/test/CodeGen/X86/soft-fp.ll
+++ b/test/CodeGen/X86/soft-fp.ll
@@ -22,6 +22,6 @@ declare void @llvm.va_end(i8*) nounwind
 
 define float @t2(float %a, float %b) nounwind readnone {
 entry:
-	%0 = add float %a, %b		; <float> [#uses=1]
+	%0 = fadd float %a, %b		; <float> [#uses=1]
 	ret float %0
 }
diff --git a/test/CodeGen/X86/sse-align-0.ll b/test/CodeGen/X86/sse-align-0.ll
index 39debaa3257f..5a888b2e784b 100644
--- a/test/CodeGen/X86/sse-align-0.ll
+++ b/test/CodeGen/X86/sse-align-0.ll
@@ -2,11 +2,11 @@
 
 define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   %t = load <4 x float>* %p
-  %z = mul <4 x float> %t, %x
+  %z = fmul <4 x float> %t, %x
   ret <4 x float> %z
 }
 define <2 x double> @bar(<2 x double>* %p, <2 x double> %x) nounwind {
   %t = load <2 x double>* %p
-  %z = mul <2 x double> %t, %x
+  %z = fmul <2 x double> %t, %x
   ret <2 x double> %z
 }
diff --git a/test/CodeGen/X86/sse-align-2.ll b/test/CodeGen/X86/sse-align-2.ll
index b5b261d20ab4..ba693a200151 100644
--- a/test/CodeGen/X86/sse-align-2.ll
+++ b/test/CodeGen/X86/sse-align-2.ll
@@ -2,11 +2,11 @@
 
 define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   %t = load <4 x float>* %p, align 4
-  %z = mul <4 x float> %t, %x
+  %z = fmul <4 x float> %t, %x
   ret <4 x float> %z
 }
 define <2 x double> @bar(<2 x double>* %p, <2 x double> %x) nounwind {
   %t = load <2 x double>* %p, align 8
-  %z = mul <2 x double> %t, %x
+  %z = fmul <2 x double> %t, %x
   ret <2 x double> %z
 }
diff --git a/test/CodeGen/X86/sse-fcopysign.ll b/test/CodeGen/X86/sse-fcopysign.ll
index cff1f7fa86f6..d8c32831a1e9 100644
--- a/test/CodeGen/X86/sse-fcopysign.ll
+++ b/test/CodeGen/X86/sse-fcopysign.ll
@@ -6,7 +6,7 @@ define float @tst1(float %a, float %b) {
 }
 
 define double @tst2(double %a, float %b, float %c) {
-	%tmp1 = add float %b, %c
+	%tmp1 = fadd float %b, %c
 	%tmp2 = fpext float %tmp1 to double
 	%tmp = tail call double @copysign( double %a, double %tmp2 )
 	ret double %tmp
diff --git a/test/CodeGen/X86/sse41-extractps-bitcast-1.ll b/test/CodeGen/X86/sse41-extractps-bitcast-1.ll
index fc0df060623f..470d14671596 100644
--- a/test/CodeGen/X86/sse41-extractps-bitcast-1.ll
+++ b/test/CodeGen/X86/sse41-extractps-bitcast-1.ll
@@ -6,7 +6,7 @@
 
 define float @bar(<4 x float> %v) {
   %s = extractelement <4 x float> %v, i32 3
-  %t = add float %s, 1.0
+  %t = fadd float %s, 1.0
   ret float %t
 }
 define float @baz(<4 x float> %v) {
diff --git a/test/CodeGen/X86/sse41-pmovx.ll b/test/CodeGen/X86/sse41-pmovx.ll
index 71e5e254121c..c8cfec913cd8 100644
--- a/test/CodeGen/X86/sse41-pmovx.ll
+++ b/test/CodeGen/X86/sse41-pmovx.ll
@@ -2,7 +2,7 @@
 ; RUN: llvm-as < %s | llc -march=x86    -mattr=sse41 | not grep movq
 ; RUN: llvm-as < %s | llc -march=x86    -mattr=sse41 | grep pmovsxbd
 ; RUN: llvm-as < %s | llc -march=x86    -mattr=sse41 | grep pmovsxwd
-; RUN: llvm-as < %s | llc -march=x86    -mattr=sse41 | grep pmovsxbq
+; RUN: llvm-as < %s | llc -march=x86    -mattr=sse41 | grep pmovzxbq
 ; RUN: llvm-as < %s | llc -march=x86-64 -mattr=sse41 -mtriple=x86_64-apple-darwin | grep movq | count 1
 ; RUN: llvm-as < %s | llc -march=x86-64 -mattr=sse41 -mtriple=x86_64-unknown-linux-gnu | not grep movq
 
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index 1e6c2b2af060..dda6f0d928c9 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -14,7 +14,7 @@ entry:
 	%tmp2 = tail call double @fabs( double %tmp1 )		; <double> [#uses=1]
 	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
 	%tmp4 = tail call double @fabs( double %tmp3 )		; <double> [#uses=1]
-	%tmp6 = add double %tmp4, %tmp2		; <double> [#uses=1]
+	%tmp6 = fadd double %tmp4, %tmp2		; <double> [#uses=1]
 	store double %tmp6, double* %P, align 8
 	ret void
 }
diff --git a/test/CodeGen/X86/storetrunc-fp.ll b/test/CodeGen/X86/storetrunc-fp.ll
index 655cbd68b37a..945cf48f9bde 100644
--- a/test/CodeGen/X86/storetrunc-fp.ll
+++ b/test/CodeGen/X86/storetrunc-fp.ll
@@ -1,7 +1,7 @@
 ; RUN: llvm-as < %s | llc -march=x86 | not grep flds
 
 define void @foo(x86_fp80 %a, x86_fp80 %b, float* %fp) {
-	%c = add x86_fp80 %a, %b
+	%c = fadd x86_fp80 %a, %b
 	%d = fptrunc x86_fp80 %c to float
 	store float %d, float* %fp
 	ret void
diff --git a/test/CodeGen/X86/stride-reuse.ll b/test/CodeGen/X86/stride-reuse.ll
index 97f33d8adbcb..277a4430acaa 100644
--- a/test/CodeGen/X86/stride-reuse.ll
+++ b/test/CodeGen/X86/stride-reuse.ll
@@ -14,7 +14,7 @@ bb:
 	%i.019.0 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ]
 	%tmp2 = getelementptr [1000 x float]* @B, i32 0, i32 %i.019.0
 	%tmp3 = load float* %tmp2, align 4
-	%tmp4 = mul float %tmp3, 2.000000e+00
+	%tmp4 = fmul float %tmp3, 2.000000e+00
 	%tmp5 = getelementptr [1000 x float]* @A, i32 0, i32 %i.019.0
 	store float %tmp4, float* %tmp5, align 4
 	%tmp8 = shl i32 %i.019.0, 1
diff --git a/test/CodeGen/X86/twoaddr-coalesce-2.ll b/test/CodeGen/X86/twoaddr-coalesce-2.ll
index 9a011f7bac8b..3fe4cd1b781a 100644
--- a/test/CodeGen/X86/twoaddr-coalesce-2.ll
+++ b/test/CodeGen/X86/twoaddr-coalesce-2.ll
@@ -9,7 +9,7 @@ entry:
 	%tmp.i3 = bitcast <2 x double> %B to <2 x i64>		; <<2 x i64>> [#uses=1]
 	%tmp2.i = or <2 x i64> %tmp.i3, <i64 4607632778762754458, i64 4607632778762754458>		; <<2 x i64>> [#uses=1]
 	%tmp3.i = bitcast <2 x i64> %tmp2.i to <2 x double>		; <<2 x double>> [#uses=1]
-	%tmp.i2 = add <2 x double> %tmp3.i, %A		; <<2 x double>> [#uses=1]
-	%tmp.i = add <2 x double> %tmp.i2, %C		; <<2 x double>> [#uses=1]
+	%tmp.i2 = fadd <2 x double> %tmp3.i, %A		; <<2 x double>> [#uses=1]
+	%tmp.i = fadd <2 x double> %tmp.i2, %C		; <<2 x double>> [#uses=1]
 	ret <2 x double> %tmp.i
 }
diff --git a/test/CodeGen/X86/vec_extract.ll b/test/CodeGen/X86/vec_extract.ll
index f1f009ec814f..ee7567cf7609 100644
--- a/test/CodeGen/X86/vec_extract.ll
+++ b/test/CodeGen/X86/vec_extract.ll
@@ -6,7 +6,7 @@
 
 define void @test1(<4 x float>* %F, float* %f) nounwind {
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp7 = add <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp7 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	%tmp2 = extractelement <4 x float> %tmp7, i32 0		; <float> [#uses=1]
 	store float %tmp2, float* %f
 	ret void
@@ -14,7 +14,7 @@ define void @test1(<4 x float>* %F, float* %f) nounwind {
 
 define float @test2(<4 x float>* %F, float* %f) nounwind {
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp7 = add <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp7 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	%tmp2 = extractelement <4 x float> %tmp7, i32 2		; <float> [#uses=1]
 	ret float %tmp2
 }
@@ -29,7 +29,7 @@ define void @test3(float* %R, <4 x float>* %P1) nounwind {
 define double @test4(double %A) nounwind {
 	%tmp1 = call <2 x double> @foo( )		; <<2 x double>> [#uses=1]
 	%tmp2 = extractelement <2 x double> %tmp1, i32 1		; <double> [#uses=1]
-	%tmp3 = add double %tmp2, %A		; <double> [#uses=1]
+	%tmp3 = fadd double %tmp2, %A		; <double> [#uses=1]
 	ret double %tmp3
 }
 
diff --git a/test/CodeGen/X86/vec_fneg.ll b/test/CodeGen/X86/vec_fneg.ll
index 03765d681cc3..a801472622f2 100644
--- a/test/CodeGen/X86/vec_fneg.ll
+++ b/test/CodeGen/X86/vec_fneg.ll
@@ -1,11 +1,11 @@
 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
 
 define <4 x float> @t1(<4 x float> %Q) {
-        %tmp15 = sub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %Q
+        %tmp15 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %Q
 	ret <4 x float> %tmp15
 }
 
 define <4 x float> @t2(<4 x float> %Q) {
-        %tmp15 = sub <4 x float> zeroinitializer, %Q
+        %tmp15 = fsub <4 x float> zeroinitializer, %Q
 	ret <4 x float> %tmp15
 }
diff --git a/test/CodeGen/X86/vec_ins_extract.ll b/test/CodeGen/X86/vec_ins_extract.ll
index 86f13069de29..788283957585 100644
--- a/test/CodeGen/X86/vec_ins_extract.ll
+++ b/test/CodeGen/X86/vec_ins_extract.ll
@@ -7,9 +7,9 @@
 define void @test(<4 x float>* %F, float %f) {
 entry:
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp3 = add <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	%tmp10 = insertelement <4 x float> %tmp3, float %f, i32 0		; <<4 x float>> [#uses=2]
-	%tmp6 = add <4 x float> %tmp10, %tmp10		; <<4 x float>> [#uses=1]
+	%tmp6 = fadd <4 x float> %tmp10, %tmp10		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp6, <4 x float>* %F
 	ret void
 }
@@ -18,12 +18,12 @@ define void @test2(<4 x float>* %F, float %f) {
 entry:
 	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=3]
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp3 = add <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp3, <4 x float>* %G
 	%tmp.upgrd.1 = getelementptr <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]
 	store float %f, float* %tmp.upgrd.1
 	%tmp4 = load <4 x float>* %G		; <<4 x float>> [#uses=2]
-	%tmp6 = add <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
+	%tmp6 = fadd <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp6, <4 x float>* %F
 	ret void
 }
@@ -32,7 +32,7 @@ define void @test3(<4 x float>* %F, float* %f) {
 entry:
 	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=2]
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp3 = add <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp3, <4 x float>* %G
 	%tmp.upgrd.2 = getelementptr <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]
 	%tmp.upgrd.3 = load float* %tmp.upgrd.2		; <float> [#uses=1]
@@ -45,7 +45,7 @@ entry:
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
 	%tmp5.lhs = extractelement <4 x float> %tmp, i32 0		; <float> [#uses=1]
 	%tmp5.rhs = extractelement <4 x float> %tmp, i32 0		; <float> [#uses=1]
-	%tmp5 = add float %tmp5.lhs, %tmp5.rhs		; <float> [#uses=1]
+	%tmp5 = fadd float %tmp5.lhs, %tmp5.rhs		; <float> [#uses=1]
 	store float %tmp5, float* %f
 	ret void
 }
diff --git a/test/CodeGen/X86/vec_insert.ll b/test/CodeGen/X86/vec_insert.ll
index e032c5b8549c..3a9464ceff12 100644
--- a/test/CodeGen/X86/vec_insert.ll
+++ b/test/CodeGen/X86/vec_insert.ll
@@ -5,7 +5,7 @@ define void @test(<4 x float>* %F, i32 %I) {
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=1]
 	%f = sitofp i32 %I to float		; <float> [#uses=1]
 	%tmp1 = insertelement <4 x float> %tmp, float %f, i32 0		; <<4 x float>> [#uses=2]
-	%tmp18 = add <4 x float> %tmp1, %tmp1		; <<4 x float>> [#uses=1]
+	%tmp18 = fadd <4 x float> %tmp1, %tmp1		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp18, <4 x float>* %F
 	ret void
 }
diff --git a/test/CodeGen/X86/vec_logical.ll b/test/CodeGen/X86/vec_logical.ll
index 6e03afb94a80..f8957629f8a1 100644
--- a/test/CodeGen/X86/vec_logical.ll
+++ b/test/CodeGen/X86/vec_logical.ll
@@ -4,7 +4,7 @@
 ; RUN: grep movaps %t | count 2
 
 define void @t(<4 x float> %A) {
-	%tmp1277 = sub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %A
+	%tmp1277 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %A
 	store <4 x float> %tmp1277, <4 x float>* null
 	ret void
 }
diff --git a/test/CodeGen/X86/vec_select.ll b/test/CodeGen/X86/vec_select.ll
index ede7ab26700c..ecb825b00bd8 100644
--- a/test/CodeGen/X86/vec_select.ll
+++ b/test/CodeGen/X86/vec_select.ll
@@ -3,7 +3,7 @@
 define void @test(i32 %C, <4 x float>* %A, <4 x float>* %B) {
         %tmp = load <4 x float>* %A             ; <<4 x float>> [#uses=1]
         %tmp3 = load <4 x float>* %B            ; <<4 x float>> [#uses=2]
-        %tmp9 = mul <4 x float> %tmp3, %tmp3            ; <<4 x float>> [#uses=1]
+        %tmp9 = fmul <4 x float> %tmp3, %tmp3            ; <<4 x float>> [#uses=1]
         %tmp.upgrd.1 = icmp eq i32 %C, 0                ; <i1> [#uses=1]
         %iftmp.38.0 = select i1 %tmp.upgrd.1, <4 x float> %tmp9, <4 x float> %tmp               ; <<4 x float>> [#uses=1]
         store <4 x float> %iftmp.38.0, <4 x float>* %A
diff --git a/test/CodeGen/X86/vec_shuffle-27.ll b/test/CodeGen/X86/vec_shuffle-27.ll
index 6baf47a0af2a..231ac0c3b834 100644
--- a/test/CodeGen/X86/vec_shuffle-27.ll
+++ b/test/CodeGen/X86/vec_shuffle-27.ll
@@ -10,8 +10,8 @@ target triple = "i686-apple-cl.1.0"
 define <8 x float> @my2filter4_1d(<4 x float> %a, <8 x float> %T0, <8 x float> %T1) nounwind readnone {
 entry:
 	%tmp7 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3 >		; <<8 x float>> [#uses=1]
-	%sub = sub <8 x float> %T1, %T0		; <<8 x float>> [#uses=1]
-	%mul = mul <8 x float> %sub, %tmp7		; <<8 x float>> [#uses=1]
-	%add = add <8 x float> %mul, %T0		; <<8 x float>> [#uses=1]
+	%sub = fsub <8 x float> %T1, %T0		; <<8 x float>> [#uses=1]
+	%mul = fmul <8 x float> %sub, %tmp7		; <<8 x float>> [#uses=1]
+	%add = fadd <8 x float> %mul, %T0		; <<8 x float>> [#uses=1]
 	ret <8 x float> %add
 }
diff --git a/test/CodeGen/X86/vec_shuffle-3.ll b/test/CodeGen/X86/vec_shuffle-3.ll
index 6de103808066..018b4cf1a06b 100644
--- a/test/CodeGen/X86/vec_shuffle-3.ll
+++ b/test/CodeGen/X86/vec_shuffle-3.ll
@@ -5,8 +5,8 @@
 define <4 x float> @test1(<4 x float>* %x, <4 x float>* %y) {
         %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=2]
         %tmp5 = load <4 x float>* %x            ; <<4 x float>> [#uses=2]
-        %tmp9 = add <4 x float> %tmp5, %tmp             ; <<4 x float>> [#uses=1]
-        %tmp21 = sub <4 x float> %tmp5, %tmp            ; <<4 x float>> [#uses=1]
+        %tmp9 = fadd <4 x float> %tmp5, %tmp             ; <<4 x float>> [#uses=1]
+        %tmp21 = fsub <4 x float> %tmp5, %tmp            ; <<4 x float>> [#uses=1]
         %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
         ret <4 x float> %tmp27
 }
diff --git a/test/CodeGen/X86/vec_shuffle-5.ll b/test/CodeGen/X86/vec_shuffle-5.ll
index 1acd73fcba39..e356f2456b75 100644
--- a/test/CodeGen/X86/vec_shuffle-5.ll
+++ b/test/CodeGen/X86/vec_shuffle-5.ll
@@ -6,7 +6,7 @@ define void @test() nounwind {
         %tmp1 = load <4 x float>* null          ; <<4 x float>> [#uses=2]
         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >             ; <<4 x float>> [#uses=1]
         %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >                ; <<4 x float>> [#uses=1]
-        %tmp4 = add <4 x float> %tmp2, %tmp3            ; <<4 x float>> [#uses=1]
+        %tmp4 = fadd <4 x float> %tmp2, %tmp3            ; <<4 x float>> [#uses=1]
         store <4 x float> %tmp4, <4 x float>* null
         ret void
 }
diff --git a/test/CodeGen/X86/vec_splat.ll b/test/CodeGen/X86/vec_splat.ll
index 64222e40ff23..89914fda63ba 100644
--- a/test/CodeGen/X86/vec_splat.ll
+++ b/test/CodeGen/X86/vec_splat.ll
@@ -7,7 +7,7 @@ define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {
 	%tmp4 = insertelement <4 x float> %tmp2, float %X, i32 2		; <<4 x float>> [#uses=1]
 	%tmp6 = insertelement <4 x float> %tmp4, float %X, i32 3		; <<4 x float>> [#uses=1]
 	%tmp8 = load <4 x float>* %Q		; <<4 x float>> [#uses=1]
-	%tmp10 = mul <4 x float> %tmp8, %tmp6		; <<4 x float>> [#uses=1]
+	%tmp10 = fmul <4 x float> %tmp8, %tmp6		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp10, <4 x float>* %P
 	ret void
 }
@@ -16,7 +16,7 @@ define void @test_v2sd(<2 x double>* %P, <2 x double>* %Q, double %X) nounwind {
 	%tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0		; <<2 x double>> [#uses=1]
 	%tmp2 = insertelement <2 x double> %tmp, double %X, i32 1		; <<2 x double>> [#uses=1]
 	%tmp4 = load <2 x double>* %Q		; <<2 x double>> [#uses=1]
-	%tmp6 = mul <2 x double> %tmp4, %tmp2		; <<2 x double>> [#uses=1]
+	%tmp6 = fmul <2 x double> %tmp4, %tmp2		; <<2 x double>> [#uses=1]
 	store <2 x double> %tmp6, <2 x double>* %P
 	ret void
 }
diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll
index 0f15f923fea6..69900a686e25 100644
--- a/test/CodeGen/X86/vec_ss_load_fold.ll
+++ b/test/CodeGen/X86/vec_ss_load_fold.ll
@@ -20,8 +20,8 @@ define i16 @test1(float %f) nounwind {
 }
 
 define i16 @test2(float %f) nounwind {
-	%tmp28 = sub float %f, 1.000000e+00		; <float> [#uses=1]
-	%tmp37 = mul float %tmp28, 5.000000e-01		; <float> [#uses=1]
+	%tmp28 = fsub float %f, 1.000000e+00		; <float> [#uses=1]
+	%tmp37 = fmul float %tmp28, 5.000000e-01		; <float> [#uses=1]
 	%tmp375 = insertelement <4 x float> undef, float %tmp37, i32 0		; <<4 x float>> [#uses=1]
 	%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > )		; <<4 x float>> [#uses=1]
 	%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> < float 0.000000e+00, float undef, float undef, float undef > )		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_zero.ll b/test/CodeGen/X86/vec_zero.ll
index 17b378f39203..0a7a54341269 100644
--- a/test/CodeGen/X86/vec_zero.ll
+++ b/test/CodeGen/X86/vec_zero.ll
@@ -2,7 +2,7 @@
 
 define void @foo(<4 x float>* %P) {
         %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]
-        %S = add <4 x float> zeroinitializer, %T                ; <<4 x float>> [#uses=1]
+        %S = fadd <4 x float> zeroinitializer, %T                ; <<4 x float>> [#uses=1]
         store <4 x float> %S, <4 x float>* %P
         ret void
 }
diff --git a/test/CodeGen/X86/vector.ll b/test/CodeGen/X86/vector.ll
index e7f3b922013c..8e1de2f62cb0 100644
--- a/test/CodeGen/X86/vector.ll
+++ b/test/CodeGen/X86/vector.ll
@@ -15,7 +15,7 @@
 define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
         %p = load %f1* %P               ; <%f1> [#uses=1]
         %q = load %f1* %Q               ; <%f1> [#uses=1]
-        %R = add %f1 %p, %q             ; <%f1> [#uses=1]
+        %R = fadd %f1 %p, %q             ; <%f1> [#uses=1]
         store %f1 %R, %f1* %S
         ret void
 }
@@ -23,7 +23,7 @@ define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
 define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
         %p = load %f2* %P               ; <%f2> [#uses=1]
         %q = load %f2* %Q               ; <%f2> [#uses=1]
-        %R = add %f2 %p, %q             ; <%f2> [#uses=1]
+        %R = fadd %f2 %p, %q             ; <%f2> [#uses=1]
         store %f2 %R, %f2* %S
         ret void
 }
@@ -31,7 +31,7 @@ define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
 define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
         %p = load %f4* %P               ; <%f4> [#uses=1]
         %q = load %f4* %Q               ; <%f4> [#uses=1]
-        %R = add %f4 %p, %q             ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, %q             ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
 }
@@ -39,7 +39,7 @@ define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
 define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
         %p = load %f8* %P               ; <%f8> [#uses=1]
         %q = load %f8* %Q               ; <%f8> [#uses=1]
-        %R = add %f8 %p, %q             ; <%f8> [#uses=1]
+        %R = fadd %f8 %p, %q             ; <%f8> [#uses=1]
         store %f8 %R, %f8* %S
         ret void
 }
@@ -47,7 +47,7 @@ define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
 define void @test_fmul(%f8* %P, %f8* %Q, %f8* %S) {
         %p = load %f8* %P               ; <%f8> [#uses=1]
         %q = load %f8* %Q               ; <%f8> [#uses=1]
-        %R = mul %f8 %p, %q             ; <%f8> [#uses=1]
+        %R = fmul %f8 %p, %q             ; <%f8> [#uses=1]
         store %f8 %R, %f8* %S
         ret void
 }
@@ -64,21 +64,21 @@ define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {
 
 define void @test_cst(%f4* %P, %f4* %S) {
         %p = load %f4* %P               ; <%f4> [#uses=1]
-        %R = add %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float 2.000000e+00, float 4.500000e+00 >             ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float 2.000000e+00, float 4.500000e+00 >             ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
 }
 
 define void @test_zero(%f4* %P, %f4* %S) {
         %p = load %f4* %P               ; <%f4> [#uses=1]
-        %R = add %f4 %p, zeroinitializer                ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, zeroinitializer                ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
 }
 
 define void @test_undef(%f4* %P, %f4* %S) {
         %p = load %f4* %P               ; <%f4> [#uses=1]
-        %R = add %f4 %p, undef          ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, undef          ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
 }
@@ -115,7 +115,7 @@ define double @test_extract_elt2(%d8* %P) {
 
 define void @test_cast_1(%f4* %b, %i4* %a) {
         %tmp = load %f4* %b             ; <%f4> [#uses=1]
-        %tmp2 = add %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >              ; <%f4> [#uses=1]
+        %tmp2 = fadd %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >              ; <%f4> [#uses=1]
         %tmp3 = bitcast %f4 %tmp2 to %i4                ; <%i4> [#uses=1]
         %tmp4 = add %i4 %tmp3, < i32 1, i32 2, i32 3, i32 4 >           ; <%i4> [#uses=1]
         store %i4 %tmp4, %i4* %a
@@ -138,7 +138,7 @@ define void @splat(%f4* %P, %f4* %Q, float %X) {
         %tmp4 = insertelement %f4 %tmp2, float %X, i32 2                ; <%f4> [#uses=1]
         %tmp6 = insertelement %f4 %tmp4, float %X, i32 3                ; <%f4> [#uses=1]
         %q = load %f4* %Q               ; <%f4> [#uses=1]
-        %R = add %f4 %q, %tmp6          ; <%f4> [#uses=1]
+        %R = fadd %f4 %q, %tmp6          ; <%f4> [#uses=1]
         store %f4 %R, %f4* %P
         ret void
 }
diff --git a/test/CodeGen/X86/widen_arith-6.ll b/test/CodeGen/X86/widen_arith-6.ll
index 59548c333622..7b0bb33c0024 100644
--- a/test/CodeGen/X86/widen_arith-6.ll
+++ b/test/CodeGen/X86/widen_arith-6.ll
@@ -34,8 +34,8 @@ forbody:		; preds = %forcond
 	%arrayidx6 = getelementptr <3 x float>* %tmp5, i32 %tmp4		; <<3 x float>*> [#uses=1]
 	%tmp7 = load <3 x float>* %arrayidx6		; <<3 x float>> [#uses=1]
 	%tmp8 = load <3 x float>* %v		; <<3 x float>> [#uses=1]
-	%mul = mul <3 x float> %tmp7, %tmp8		; <<3 x float>> [#uses=1]
-	%add = add <3 x float> %mul, < float 0x409EE02900000000, float 0x409EE02900000000, float 0x409EE02900000000 >		; <<3 x float>> [#uses=1]
+	%mul = fmul <3 x float> %tmp7, %tmp8		; <<3 x float>> [#uses=1]
+	%add = fadd <3 x float> %mul, < float 0x409EE02900000000, float 0x409EE02900000000, float 0x409EE02900000000 >		; <<3 x float>> [#uses=1]
 	store <3 x float> %add, <3 x float>* %arrayidx
 	br label %forinc
 
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index d3bbd5f9ede5..a676f33d6c68 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -5,7 +5,7 @@
 define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
 entry:
 	%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2>
-	%val = add <3 x float> %x, %src2;
+	%val = fadd <3 x float> %x, %src2;
 	store <3 x float> %val, <3 x float>* %dst.addr
 	ret void
 }
diff --git a/test/CodeGen/X86/widen_shuffle-2.ll b/test/CodeGen/X86/widen_shuffle-2.ll
index d25e02e6de38..c2dfa3d272c3 100644
--- a/test/CodeGen/X86/widen_shuffle-2.ll
+++ b/test/CodeGen/X86/widen_shuffle-2.ll
@@ -5,7 +5,7 @@
 define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
 entry:
 	%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2>
-	%val = add <3 x float> %x, %src2;
+	%val = fadd <3 x float> %x, %src2;
 	store <3 x float> %val, <3 x float>* %dst.addr
 	ret void
 }
diff --git a/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll b/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll
index 735e988e91c8..b9333c94abe3 100644
--- a/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll
+++ b/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll
@@ -8,11 +8,11 @@ bb113:		; preds = %entry
 	ret double 0.000000e+00
 
 bb129:		; preds = %entry
-	%tmp134 = sub double %b, %a		; <double> [#uses=1]
-	%tmp136 = sub double %tmp134, %c		; <double> [#uses=1]
-	%tmp138 = add double %tmp136, %d		; <double> [#uses=1]
-	%tmp140 = sub double %tmp138, %e		; <double> [#uses=1]
-	%tmp142 = add double %tmp140, %f		; <double> [#uses=1]
-	%tmp.0 = mul double %tmp142, 0.000000e+00		; <double> [#uses=1]
+	%tmp134 = fsub double %b, %a		; <double> [#uses=1]
+	%tmp136 = fsub double %tmp134, %c		; <double> [#uses=1]
+	%tmp138 = fadd double %tmp136, %d		; <double> [#uses=1]
+	%tmp140 = fsub double %tmp138, %e		; <double> [#uses=1]
+	%tmp142 = fadd double %tmp140, %f		; <double> [#uses=1]
+	%tmp.0 = fmul double %tmp142, 0.000000e+00		; <double> [#uses=1]
 	ret double %tmp.0
 }
diff --git a/test/CodeGen/XCore/fneg.ll b/test/CodeGen/XCore/fneg.ll
index e4426fd908f3..3fb7b0186940 100644
--- a/test/CodeGen/XCore/fneg.ll
+++ b/test/CodeGen/XCore/fneg.ll
@@ -2,7 +2,7 @@
 ; RUN: grep "xor" %t1.s | count 1
 define i1 @test(double %F) nounwind {
 entry:
-	%0 = sub double -0.000000e+00, %F
+	%0 = fsub double -0.000000e+00, %F
 	%1 = fcmp olt double 0.000000e+00, %0
 	ret i1 %1
 }
diff --git a/test/ExecutionEngine/2003-01-10-FUCOM.ll b/test/ExecutionEngine/2003-01-10-FUCOM.ll
index 628be16e3191..30f93309d5cf 100644
--- a/test/ExecutionEngine/2003-01-10-FUCOM.ll
+++ b/test/ExecutionEngine/2003-01-10-FUCOM.ll
@@ -2,10 +2,10 @@
 ; RUN: lli %t.bc > /dev/null
 
 define i32 @main() {
-	%X = add double 0.000000e+00, 1.000000e+00		; <double> [#uses=1]
-	%Y = sub double 0.000000e+00, 1.000000e+00		; <double> [#uses=2]
+	%X = fadd double 0.000000e+00, 1.000000e+00		; <double> [#uses=1]
+	%Y = fsub double 0.000000e+00, 1.000000e+00		; <double> [#uses=2]
 	%Z = fcmp oeq double %X, %Y		; <i1> [#uses=0]
-	add double %Y, 0.000000e+00		; <double>:1 [#uses=0]
+	fadd double %Y, 0.000000e+00		; <double>:1 [#uses=0]
 	ret i32 0
 }
 
diff --git a/test/ExecutionEngine/test-fp.ll b/test/ExecutionEngine/test-fp.ll
index a119b40c34a9..2e8ecd5a740f 100644
--- a/test/ExecutionEngine/test-fp.ll
+++ b/test/ExecutionEngine/test-fp.ll
@@ -3,13 +3,13 @@
 
 define double @test(double* %DP, double %Arg) {
 	%D = load double* %DP		; <double> [#uses=1]
-	%V = add double %D, 1.000000e+00		; <double> [#uses=2]
-	%W = sub double %V, %V		; <double> [#uses=3]
-	%X = mul double %W, %W		; <double> [#uses=2]
+	%V = fadd double %D, 1.000000e+00		; <double> [#uses=2]
+	%W = fsub double %V, %V		; <double> [#uses=3]
+	%X = fmul double %W, %W		; <double> [#uses=2]
 	%Y = fdiv double %X, %X		; <double> [#uses=2]
 	%Z = frem double %Y, %Y		; <double> [#uses=3]
 	%Z1 = fdiv double %Z, %W		; <double> [#uses=0]
-	%Q = add double %Z, %Arg		; <double> [#uses=1]
+	%Q = fadd double %Z, %Arg		; <double> [#uses=1]
 	%R = bitcast double %Q to double		; <double> [#uses=1]
 	store double %R, double* %DP
 	ret double %Z
diff --git a/test/ExecutionEngine/test-setcond-fp.ll b/test/ExecutionEngine/test-setcond-fp.ll
index 235c4027b996..b917693abd46 100644
--- a/test/ExecutionEngine/test-setcond-fp.ll
+++ b/test/ExecutionEngine/test-setcond-fp.ll
@@ -3,10 +3,10 @@
 
 
 define i32 @main() {
-	%double1 = add double 0.000000e+00, 0.000000e+00		; <double> [#uses=6]
-	%double2 = add double 0.000000e+00, 0.000000e+00		; <double> [#uses=6]
-	%float1 = add float 0.000000e+00, 0.000000e+00		; <float> [#uses=6]
-	%float2 = add float 0.000000e+00, 0.000000e+00		; <float> [#uses=6]
+	%double1 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=6]
+	%double2 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=6]
+	%float1 = fadd float 0.000000e+00, 0.000000e+00		; <float> [#uses=6]
+	%float2 = fadd float 0.000000e+00, 0.000000e+00		; <float> [#uses=6]
 	%test49 = fcmp oeq float %float1, %float2		; <i1> [#uses=0]
 	%test50 = fcmp oge float %float1, %float2		; <i1> [#uses=0]
 	%test51 = fcmp ogt float %float1, %float2		; <i1> [#uses=0]
diff --git a/test/Feature/ppcld.ll b/test/Feature/ppcld.ll
index f21eb437c6d0..393a491847c7 100644
--- a/test/Feature/ppcld.ll
+++ b/test/Feature/ppcld.ll
@@ -15,7 +15,7 @@ entry:
 	%tmp = load float* @f		; <float> [#uses=1]
 	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
 	%tmp2 = load double* @d		; <double> [#uses=1]
-	%tmp3 = mul double %tmp1, %tmp2		; <double> [#uses=1]
+	%tmp3 = fmul double %tmp1, %tmp2		; <double> [#uses=1]
 	%tmp4 = fpext double %tmp3 to ppc_fp128		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp4, ppc_fp128* @ld
 	br label %return
diff --git a/test/Feature/sparcld.ll b/test/Feature/sparcld.ll
index 2e99bdae3262..095f6f6c7c75 100644
--- a/test/Feature/sparcld.ll
+++ b/test/Feature/sparcld.ll
@@ -13,7 +13,7 @@ entry:
 	%tmp = load float* @f		; <float> [#uses=1]
 	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
 	%tmp2 = load double* @d		; <double> [#uses=1]
-	%tmp3 = mul double %tmp1, %tmp2		; <double> [#uses=1]
+	%tmp3 = fmul double %tmp1, %tmp2		; <double> [#uses=1]
 	%tmp4 = fpext double %tmp3 to fp128		; <fp128> [#uses=1]
 	store fp128 %tmp4, fp128* @ld
 	br label %return
diff --git a/test/Feature/x86ld.ll b/test/Feature/x86ld.ll
index 690400346e34..32005aee305d 100644
--- a/test/Feature/x86ld.ll
+++ b/test/Feature/x86ld.ll
@@ -15,7 +15,7 @@ entry:
 	%tmp = load float* @f		; <float> [#uses=1]
 	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
 	%tmp2 = load double* @d		; <double> [#uses=1]
-	%tmp3 = mul double %tmp1, %tmp2		; <double> [#uses=1]
+	%tmp3 = fmul double %tmp1, %tmp2		; <double> [#uses=1]
 	%tmp4 = fpext double %tmp3 to x86_fp80		; <x86_fp80> [#uses=1]
 	store x86_fp80 %tmp4, x86_fp80* @ld
 	br label %return
diff --git a/test/FrontendC/2009-01-20-k8.c b/test/FrontendC/2009-01-20-k8.c
index 627ab65d161d..d28302b4ce80 100644
--- a/test/FrontendC/2009-01-20-k8.c
+++ b/test/FrontendC/2009-01-20-k8.c
@@ -1,3 +1,4 @@
 // RUN: %llvmgcc %s -S -march=k8
-// XTARGET: x86
+// XFAIL: *
+// XTARGET: x86,i386,i686
 long double x;
diff --git a/test/FrontendC/2009-05-04-EnumInreg.c b/test/FrontendC/2009-05-04-EnumInreg.c
index 8a76f5f8e950..6dbdb54db5f6 100644
--- a/test/FrontendC/2009-05-04-EnumInreg.c
+++ b/test/FrontendC/2009-05-04-EnumInreg.c
@@ -1,5 +1,6 @@
 // RUN: %llvmgcc -S -m32 -mregparm=3 %s -emit-llvm -o - | grep {inreg %action}
-// XTARGET: x86
+// XFAIL: *
+// XTARGET: x86,i386,i686
 // PR3967
 
 enum kobject_action {
diff --git a/test/Other/2004-08-16-PackedSelect.ll b/test/Other/2004-08-16-PackedSelect.ll
index 64383169d293..c1d6214dc63d 100644
--- a/test/Other/2004-08-16-PackedSelect.ll
+++ b/test/Other/2004-08-16-PackedSelect.ll
@@ -5,7 +5,7 @@
 
 define void @main() {
         %t0 = load <4 x float>* @foo            ; <<4 x float>> [#uses=3]
-        %t1 = add <4 x float> %t0, %t0          ; <<4 x float>> [#uses=1]
+        %t1 = fadd <4 x float> %t0, %t0          ; <<4 x float>> [#uses=1]
         %t2 = select i1 true, <4 x float> %t0, <4 x float> %t1          ; <<4 x float>> [#uses=1]
         store <4 x float> %t2, <4 x float>* @bar
         ret void
diff --git a/test/Other/2004-08-16-PackedSimple.ll b/test/Other/2004-08-16-PackedSimple.ll
index 5bb8b7930009..81cecd4235fc 100644
--- a/test/Other/2004-08-16-PackedSimple.ll
+++ b/test/Other/2004-08-16-PackedSimple.ll
@@ -5,7 +5,7 @@
 
 define void @main() {
         %t0 = load <4 x float>* @foo            ; <<4 x float>> [#uses=3]
-        %t2 = add <4 x float> %t0, %t0          ; <<4 x float>> [#uses=1]
+        %t2 = fadd <4 x float> %t0, %t0          ; <<4 x float>> [#uses=1]
         %t3 = select i1 false, <4 x float> %t0, <4 x float> %t2         ; <<4 x float>> [#uses=1]
         store <4 x float> %t3, <4 x float>* @bar
         ret void
diff --git a/test/Other/2004-08-20-PackedControlFlow.ll b/test/Other/2004-08-20-PackedControlFlow.ll
index 49aa606adc04..39435706934f 100644
--- a/test/Other/2004-08-20-PackedControlFlow.ll
+++ b/test/Other/2004-08-20-PackedControlFlow.ll
@@ -12,7 +12,7 @@ C:              ; preds = %B
         ret void
 
 B:              ; preds = %A
-        %t2 = add %v4f %t0, %t0         ; <%v4f> [#uses=1]
+        %t2 = fadd %v4f %t0, %t0         ; <%v4f> [#uses=1]
         br label %C
 
 A:              ; preds = %0
diff --git a/test/Other/2009-06-05-no-implicit-float.ll b/test/Other/2009-06-05-no-implicit-float.ll
new file mode 100644
index 000000000000..5addfe2d99ab
--- /dev/null
+++ b/test/Other/2009-06-05-no-implicit-float.ll
@@ -0,0 +1,4 @@
+
+; RUN: llvm-as < %s | opt -verify | llvm-dis | grep noimplicitfloat
+define void @f() noimplicitfloat {
+}
diff --git a/test/Transforms/ConstProp/calls.ll b/test/Transforms/ConstProp/calls.ll
index 126db4ca2313..c573e565fc83 100644
--- a/test/Transforms/ConstProp/calls.ll
+++ b/test/Transforms/ConstProp/calls.ll
@@ -13,11 +13,11 @@ declare i1 @llvm.isunordered.f64(double, double)
 define double @T() {
         %A = call double @cos( double 0.000000e+00 )            ; <double> [#uses=1]
         %B = call double @sin( double 0.000000e+00 )            ; <double> [#uses=1]
-        %a = add double %A, %B          ; <double> [#uses=1]
+        %a = fadd double %A, %B          ; <double> [#uses=1]
         %C = call double @tan( double 0.000000e+00 )            ; <double> [#uses=1]
-        %b = add double %a, %C          ; <double> [#uses=1]
+        %b = fadd double %a, %C          ; <double> [#uses=1]
         %D = call double @sqrt( double 4.000000e+00 )           ; <double> [#uses=1]
-        %c = add double %b, %D          ; <double> [#uses=1]
+        %c = fadd double %b, %D          ; <double> [#uses=1]
         ret double %c
 }
 
diff --git a/test/Transforms/DeadStoreElimination/2006-06-27-AST-Remove.ll b/test/Transforms/DeadStoreElimination/2006-06-27-AST-Remove.ll
index 50dcf3278b89..3b3f8ada9968 100644
--- a/test/Transforms/DeadStoreElimination/2006-06-27-AST-Remove.ll
+++ b/test/Transforms/DeadStoreElimination/2006-06-27-AST-Remove.ll
@@ -601,7 +601,7 @@ entry:
 	%tmp21362 = icmp eq i32 0, 0		; <i1> [#uses=2]
 	%tmp216 = sitofp i32 %pn_restart.0.ph to float		; <float> [#uses=1]
 	%tmp216.upgrd.177 = fpext float %tmp216 to double		; <double> [#uses=1]
-	%tmp217 = add double %tmp216.upgrd.177, 1.000000e+00		; <double> [#uses=1]
+	%tmp217 = fadd double %tmp216.upgrd.177, 1.000000e+00		; <double> [#uses=1]
 	%tmp835 = icmp sgt i32 %pn_restart.0.ph, 9		; <i1> [#uses=0]
 	store i32 0, i32* @nodes
 	store i32 0, i32* @qnodes
diff --git a/test/Transforms/GVNPRE/2007-06-18-ConstantInPhi.ll b/test/Transforms/GVNPRE/2007-06-18-ConstantInPhi.ll
index b4cb5178c4ca..180105afc912 100644
--- a/test/Transforms/GVNPRE/2007-06-18-ConstantInPhi.ll
+++ b/test/Transforms/GVNPRE/2007-06-18-ConstantInPhi.ll
@@ -10,8 +10,8 @@ bb.nph:		; preds = %entry
 bb34:		; preds = %bb34, %bb.nph
 	%p.1 = phi float [ 0x3FE6A09E60000000, %bb.nph ], [ %tmp48, %bb34 ]		; <float> [#uses=1]
 	%tmp44 = load float* null		; <float> [#uses=1]
-	%tmp46 = sub float %tmp44, 0.000000e+00		; <float> [#uses=1]
-	%tmp48 = mul float %tmp46, %p.1		; <float> [#uses=1]
+	%tmp46 = fsub float %tmp44, 0.000000e+00		; <float> [#uses=1]
+	%tmp48 = fmul float %tmp46, %p.1		; <float> [#uses=1]
 	br i1 false, label %bb57, label %bb34
 
 bb57:		; preds = %bb34
diff --git a/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll b/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
index 0a8dd499c656..779e7fbddb1f 100644
--- a/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
+++ b/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
@@ -4,6 +4,6 @@
 define double @foo() nounwind  {
 entry:
 	%tmp1 = volatile load double* @t0.1441, align 8		; <double> [#uses=2]
-	%tmp4 = mul double %tmp1, %tmp1		; <double> [#uses=1]
+	%tmp4 = fmul double %tmp1, %tmp1		; <double> [#uses=1]
 	ret double %tmp4
 }
diff --git a/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll b/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
index 3464be901844..8a0b5b308f52 100644
--- a/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
+++ b/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
@@ -26,7 +26,7 @@ define double @test2() {
   %V1 = load double* getelementptr (%T* @G, i32 0, i32 0), align 16
   %V2 = load double* getelementptr (%T* @G, i32 0, i32 1), align 8
   %V3 = load double* getelementptr (%T* @G, i32 0, i32 2), align 16
-  %R = add double %V1, %V2
-  %R2 = add double %R, %V3
+  %R = fadd double %V1, %V2
+  %R2 = fadd double %R, %V3
   ret double %R2
 }
diff --git a/test/Transforms/GlobalOpt/constantexpr-dangle.ll b/test/Transforms/GlobalOpt/constantexpr-dangle.ll
index 6e33ae0653d7..6fa139be2227 100644
--- a/test/Transforms/GlobalOpt/constantexpr-dangle.ll
+++ b/test/Transforms/GlobalOpt/constantexpr-dangle.ll
@@ -7,7 +7,7 @@ define internal float @foo() {
 
 define float @bar() {
         %tmp1 = call float (...)* bitcast (float ()* @foo to float (...)*)( )
-        %tmp2 = mul float %tmp1, 1.000000e+01           ; <float> [#uses=1]
+        %tmp2 = fmul float %tmp1, 1.000000e+01           ; <float> [#uses=1]
         ret float %tmp2
 }
 
diff --git a/test/Transforms/IndVarSimplify/2006-12-10-BitCast.ll b/test/Transforms/IndVarSimplify/2006-12-10-BitCast.ll
index 903e81d3bf96..b2f8258000ff 100644
--- a/test/Transforms/IndVarSimplify/2006-12-10-BitCast.ll
+++ b/test/Transforms/IndVarSimplify/2006-12-10-BitCast.ll
@@ -18,7 +18,7 @@ cond_true52:		; preds = %cond_true27
 cond_next182.i:		; preds = %cond_next182.i, %cond_true52
 	%decay.i.0 = phi i32 [ %tmp195.i.upgrd.1, %cond_next182.i ], [ %tmp152.i, %cond_true52 ]		; <i32> [#uses=1]
 	%tmp194.i53 = bitcast i32 %decay.i.0 to float		; <float> [#uses=1]
-	%tmp195.i = sub float %tmp194.i53, 8.000000e+00		; <float> [#uses=1]
+	%tmp195.i = fsub float %tmp194.i53, 8.000000e+00		; <float> [#uses=1]
 	%tmp195.i.upgrd.1 = bitcast float %tmp195.i to i32		; <i32> [#uses=1]
 	br i1 false, label %cond_next182.i, label %bb418.i.preheader
 
diff --git a/test/Transforms/IndVarSimplify/2008-11-03-Floating.ll b/test/Transforms/IndVarSimplify/2008-11-03-Floating.ll
index 6fc065f83f63..be8b36fac6ad 100644
--- a/test/Transforms/IndVarSimplify/2008-11-03-Floating.ll
+++ b/test/Transforms/IndVarSimplify/2008-11-03-Floating.ll
@@ -6,7 +6,7 @@ entry:
 bb:		; preds = %bb, %entry
 	%x.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=2]
 	%0 = tail call i32 @foo(double %x.0.reg2mem.0) nounwind		; <i32> [#uses=0]
-	%1 = add double %x.0.reg2mem.0, 1.000000e+00		; <double> [#uses=2]
+	%1 = fadd double %x.0.reg2mem.0, 1.000000e+00		; <double> [#uses=2]
 	%2 = fcmp olt double %1, 1.000000e+04		; <i1> [#uses=1]
 	br i1 %2, label %bb, label %return
 
@@ -23,7 +23,7 @@ entry:
 bb:		; preds = %bb, %entry
 	%x.0.reg2mem.0 = phi double [ -10.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=2]
 	%0 = tail call i32 @foo(double %x.0.reg2mem.0) nounwind		; <i32> [#uses=0]
-	%1 = add double %x.0.reg2mem.0, 2.000000e+00		; <double> [#uses=2]
+	%1 = fadd double %x.0.reg2mem.0, 2.000000e+00		; <double> [#uses=2]
 	%2 = fcmp olt double %1, -1.000000e+00		; <i1> [#uses=1]
 	br i1 %2, label %bb, label %return
 
@@ -39,7 +39,7 @@ entry:
 bb:		; preds = %bb, %entry
 	%x.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=2]
 	%0 = tail call i32 @foo(double %x.0.reg2mem.0) nounwind		; <i32> [#uses=0]
-	%1 = add double %x.0.reg2mem.0, 1.000000e+00		; <double> [#uses=2]
+	%1 = fadd double %x.0.reg2mem.0, 1.000000e+00		; <double> [#uses=2]
 	%2 = fcmp olt double %1, -1.000000e+00		; <i1> [#uses=1]
 	br i1 %2, label %bb, label %return
 
@@ -54,7 +54,7 @@ entry:
 bb:		; preds = %bb, %entry
 	%x.0.reg2mem.0 = phi double [ 40.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=2]
 	%0 = tail call i32 @foo(double %x.0.reg2mem.0) nounwind		; <i32> [#uses=0]
-	%1 = add double %x.0.reg2mem.0, -1.000000e+00		; <double> [#uses=2]
+	%1 = fadd double %x.0.reg2mem.0, -1.000000e+00		; <double> [#uses=2]
 	%2 = fcmp olt double %1, 1.000000e+00		; <i1> [#uses=1]
 	br i1 %2, label %bb, label %return
 
diff --git a/test/Transforms/IndVarSimplify/2008-11-17-Floating.ll b/test/Transforms/IndVarSimplify/2008-11-17-Floating.ll
index faf1da3058c4..c947d3bc3338 100644
--- a/test/Transforms/IndVarSimplify/2008-11-17-Floating.ll
+++ b/test/Transforms/IndVarSimplify/2008-11-17-Floating.ll
@@ -9,7 +9,7 @@ entry:
 bb:		; preds = %bb, %entry
 	%x.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=2]
 	%0 = tail call i32 @foo(double %x.0.reg2mem.0) nounwind		; <i32> [#uses=0]
-	%1 = add double %x.0.reg2mem.0, 1.0e+0		; <double> [#uses=2]
+	%1 = fadd double %x.0.reg2mem.0, 1.0e+0		; <double> [#uses=2]
 	%2 = fcmp olt double %1, 2147483646.0e+0		; <i1> [#uses=1]
 	br i1 %2, label %bb, label %return
 
@@ -24,7 +24,7 @@ entry:
 bb:		; preds = %bb, %entry
 	%x.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=2]
 	%0 = tail call i32 @foo(double %x.0.reg2mem.0) nounwind		; <i32> [#uses=0]
-	%1 = add double %x.0.reg2mem.0, 1.0e+0		; <double> [#uses=2]
+	%1 = fadd double %x.0.reg2mem.0, 1.0e+0		; <double> [#uses=2]
 	%2 = fcmp olt double %1, 2147483647.0e+0		; <i1> [#uses=1]
 	br i1 %2, label %bb, label %return
 
diff --git a/test/Transforms/IndVarSimplify/2008-11-25-APFloatAssert.ll b/test/Transforms/IndVarSimplify/2008-11-25-APFloatAssert.ll
index 9fd0eb9e65ae..e611b1fd8341 100644
--- a/test/Transforms/IndVarSimplify/2008-11-25-APFloatAssert.ll
+++ b/test/Transforms/IndVarSimplify/2008-11-25-APFloatAssert.ll
@@ -6,6 +6,6 @@ entry:
 
 bb23.i91:		; preds = %bb23.i91, %entry
 	%result.0.i89 = phi ppc_fp128 [ 0xM00000000000000000000000000000000, %entry ], [ %0, %bb23.i91 ]		; <ppc_fp128> [#uses=2]
-	%0 = mul ppc_fp128 %result.0.i89, %result.0.i89		; <ppc_fp128> [#uses=1]
+	%0 = fmul ppc_fp128 %result.0.i89, %result.0.i89		; <ppc_fp128> [#uses=1]
 	br label %bb23.i91
 }
diff --git a/test/Transforms/IndVarSimplify/2009-04-27-Floating.ll b/test/Transforms/IndVarSimplify/2009-04-27-Floating.ll
index 700f294eb885..e70d577e0a9a 100644
--- a/test/Transforms/IndVarSimplify/2009-04-27-Floating.ll
+++ b/test/Transforms/IndVarSimplify/2009-04-27-Floating.ll
@@ -9,7 +9,7 @@ entry:
 loop_body:              
         %i = phi float [ %nexti, %loop_body ], [ 0.0, %entry ]          
         tail call void @foo()
-        %nexti = add float %i, 1.0              
+        %nexti = fadd float %i, 1.0
         %less = fcmp olt float %nexti, 2.0              
         br i1 %less, label %loop_body, label %done
 
diff --git a/test/Transforms/IndVarSimplify/iv-zext.ll b/test/Transforms/IndVarSimplify/iv-zext.ll
index 76d48de0d3c5..d7eb7bd47dc1 100644
--- a/test/Transforms/IndVarSimplify/iv-zext.ll
+++ b/test/Transforms/IndVarSimplify/iv-zext.ll
@@ -13,16 +13,16 @@ loop:
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double* %d, i64 %indvar.i8
 	%t1 = load double* %t0
-	%t2 = mul double %t1, 0.1
+	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = mul double %t4, 2.3
+	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
-	%t8 = mul double %t7, 4.5
+	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = add i64 %indvar, 1
 	%exitcond = icmp eq i64 %indvar.next, 10
diff --git a/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll b/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
index 5ad0af406d48..c7cf0dd017c7 100644
--- a/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
+++ b/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
@@ -2,8 +2,8 @@
 ; RUN:   grep mul | count 2
 
 define <4 x float> @test(<4 x float> %V) {
-        %Y = mul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >                ; <<4 x float>> [#uses=1]
-        %Z = mul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >               ; <<4 x float>> [#uses=1]
+        %Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >                ; <<4 x float>> [#uses=1]
+        %Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >               ; <<4 x float>> [#uses=1]
         ret <4 x float> %Z
 }
 
diff --git a/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll b/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll
index 60ee50327916..eaf10a306f58 100644
--- a/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll
+++ b/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll
@@ -3,7 +3,7 @@
 
 define <4 x float> @test(<4 x float> %tmp26, <4 x float> %tmp53) {
         ; (X+Y)-Y != X for fp vectors.
-        %tmp64 = add <4 x float> %tmp26, %tmp53         ; <<4 x float>> [#uses=1]
-        %tmp75 = sub <4 x float> %tmp64, %tmp53         ; <<4 x float>> [#uses=1]
+        %tmp64 = fadd <4 x float> %tmp26, %tmp53         ; <<4 x float>> [#uses=1]
+        %tmp75 = fsub <4 x float> %tmp64, %tmp53         ; <<4 x float>> [#uses=1]
         ret <4 x float> %tmp75
 }
diff --git a/test/Transforms/InstCombine/2008-07-16-fsub.ll b/test/Transforms/InstCombine/2008-07-16-fsub.ll
index 1d0554d1810b..ca4174d0a963 100644
--- a/test/Transforms/InstCombine/2008-07-16-fsub.ll
+++ b/test/Transforms/InstCombine/2008-07-16-fsub.ll
@@ -3,6 +3,6 @@
 
 define double @test(double %X) nounwind {
 	; fsub of self can't be optimized away.
-	%Y = sub double %X, %X
+	%Y = fsub double %X, %X
 	ret double %Y
 }
diff --git a/test/Transforms/InstCombine/add-sitofp.ll b/test/Transforms/InstCombine/add-sitofp.ll
index 35c6567da6d1..298b9a1917af 100644
--- a/test/Transforms/InstCombine/add-sitofp.ll
+++ b/test/Transforms/InstCombine/add-sitofp.ll
@@ -4,6 +4,6 @@ define double @x(i32 %a, i32 %b) nounwind {
   %m = lshr i32 %a, 24
   %n = and i32 %m, %b
   %o = sitofp i32 %n to double
-  %p = add double %o, 1.0
+  %p = fadd double %o, 1.0
   ret double %p
 }
diff --git a/test/Transforms/InstCombine/dce-iterate.ll b/test/Transforms/InstCombine/dce-iterate.ll
index e222970df6d8..faefa8add9ec 100644
--- a/test/Transforms/InstCombine/dce-iterate.ll
+++ b/test/Transforms/InstCombine/dce-iterate.ll
@@ -18,7 +18,7 @@ entry:
         %c = lshr i960 %sz101112.ins, 320               ; <i960> [#uses=1]
         %d = trunc i960 %c to i64               ; <i64> [#uses=1]
         %e = bitcast i64 %d to double           ; <double> [#uses=1]
-        %f = add double %b, %e
+        %f = fadd double %b, %e
 
         ret double %e
 }
diff --git a/test/Transforms/InstCombine/fpextend.ll b/test/Transforms/InstCombine/fpextend.ll
index 5971080cef5c..c212128eb99a 100644
--- a/test/Transforms/InstCombine/fpextend.ll
+++ b/test/Transforms/InstCombine/fpextend.ll
@@ -6,7 +6,7 @@ define void @test() nounwind  {
 entry:
 	%tmp = load float* @X, align 4		; <float> [#uses=1]
 	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
-	%tmp3 = add double %tmp1, 0.000000e+00		; <double> [#uses=1]
+	%tmp3 = fadd double %tmp1, 0.000000e+00		; <double> [#uses=1]
 	%tmp34 = fptrunc double %tmp3 to float		; <float> [#uses=1]
 	store float %tmp34, float* @X, align 4
 	ret void
@@ -28,7 +28,7 @@ define void @test4() nounwind  {
 entry:
 	%tmp = load float* @X, align 4		; <float> [#uses=1]
 	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
-	%tmp2 = sub double -0.000000e+00, %tmp1		; <double> [#uses=1]
+	%tmp2 = fsub double -0.000000e+00, %tmp1		; <double> [#uses=1]
 	%tmp34 = fptrunc double %tmp2 to float		; <float> [#uses=1]
 	store float %tmp34, float* @X, align 4
 	ret void
diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll
index 1a7402543418..9b5f7a5c5efc 100644
--- a/test/Transforms/InstCombine/mul.ll
+++ b/test/Transforms/InstCombine/mul.ll
@@ -20,7 +20,7 @@ define i32 @test3(i32 %A) {
 
 define double @test4(double %A) {
         ; This is safe for FP
-        %B = mul double 1.000000e+00, %A                ; <double> [#uses=1]
+        %B = fmul double 1.000000e+00, %A                ; <double> [#uses=1]
         ret double %B
 }
 
@@ -79,15 +79,7 @@ define i32 @test12(i8 %a, i32 %b) {
 ; PR2642
 define internal void @test13(<4 x float>*) {
 	load <4 x float>* %0, align 1
-	mul <4 x float> %2, < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >
+	fmul <4 x float> %2, < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >
 	store <4 x float> %3, <4 x float>* %0, align 1
 	ret void
 }
-
-define internal void @test14(<4 x float>*) {
-	load <4 x float>* %0, align 1
-	mul <4 x float> %2, zeroinitializer
-	store <4 x float> %3, <4 x float>* %0, align 1
-	ret void
-}
-
diff --git a/test/Transforms/InstCombine/multi-use-or.ll b/test/Transforms/InstCombine/multi-use-or.ll
index 85a8b34e2f6e..48049677acd8 100644
--- a/test/Transforms/InstCombine/multi-use-or.ll
+++ b/test/Transforms/InstCombine/multi-use-or.ll
@@ -17,7 +17,7 @@ entry:
         %c = lshr i192 %sy222324.ins, 128               ; <i192> [#uses=1]
         %d = trunc i192 %c to i64               ; <i64> [#uses=1]
         %e = bitcast i64 %d to double           ; <double> [#uses=1]
-        %f = add double %b, %e
+        %f = fadd double %b, %e
 
 ;        ret double %e
         ret double %f
diff --git a/test/Transforms/InstCombine/shufflemask-undef.ll b/test/Transforms/InstCombine/shufflemask-undef.ll
index 24384178efff..a9e8d3495887 100644
--- a/test/Transforms/InstCombine/shufflemask-undef.ll
+++ b/test/Transforms/InstCombine/shufflemask-undef.ll
@@ -75,16 +75,16 @@ bb266.i:
 	shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>>:3 [#uses=1]
 	shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>>:4 [#uses=1]
 	shufflevector <4 x float> %4, <4 x float> %3, <4 x i32> < i32 6, i32 7, i32 2, i32 3 >		; <<4 x float>>:5 [#uses=1]
-	mul <4 x float> %5, zeroinitializer		; <<4 x float>>:6 [#uses=2]
-	mul <4 x float> %6, %6		; <<4 x float>>:7 [#uses=1]
-	add <4 x float> zeroinitializer, %7		; <<4 x float>>:8 [#uses=1]
+	fmul <4 x float> %5, zeroinitializer		; <<4 x float>>:6 [#uses=2]
+	fmul <4 x float> %6, %6		; <<4 x float>>:7 [#uses=1]
+	fadd <4 x float> zeroinitializer, %7		; <<4 x float>>:8 [#uses=1]
 	call <4 x float> @llvm.x86.sse.max.ps( <4 x float> zeroinitializer, <4 x float> %8 ) nounwind readnone		; <<4 x float>>:9 [#uses=1]
 	%phitmp40 = bitcast <4 x float> %9 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp4109.i = and <4 x i32> %phitmp40, < i32 8388607, i32 8388607, i32 8388607, i32 8388607 >		; <<4 x i32>> [#uses=1]
 	%tmp4116.i = or <4 x i32> %tmp4109.i, < i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216 >		; <<4 x i32>> [#uses=1]
 	%tmp4117.i = bitcast <4 x i32> %tmp4116.i to <4 x float>		; <<4 x float>> [#uses=1]
-	add <4 x float> %tmp4117.i, zeroinitializer		; <<4 x float>>:10 [#uses=1]
-	mul <4 x float> %10, < float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01 >		; <<4 x float>>:11 [#uses=1]
+	fadd <4 x float> %tmp4117.i, zeroinitializer		; <<4 x float>>:10 [#uses=1]
+	fmul <4 x float> %10, < float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01 >		; <<4 x float>>:11 [#uses=1]
 	call <4 x float> @llvm.x86.sse.max.ps( <4 x float> %11, <4 x float> zeroinitializer ) nounwind readnone		; <<4 x float>>:12 [#uses=1]
 	call <4 x float> @llvm.x86.sse.min.ps( <4 x float> %12, <4 x float> zeroinitializer ) nounwind readnone		; <<4 x float>>:13 [#uses=1]
 	%tmp4170.i = call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %13, <4 x float> zeroinitializer, i8 2 ) nounwind		; <<4 x float>> [#uses=1]
diff --git a/test/Transforms/InstCombine/signed-comparison.ll b/test/Transforms/InstCombine/signed-comparison.ll
index fdf150f9c61d..86e07ec7ce15 100644
--- a/test/Transforms/InstCombine/signed-comparison.ll
+++ b/test/Transforms/InstCombine/signed-comparison.ll
@@ -14,7 +14,7 @@ bb:
 	%t0 = and i64 %indvar, 65535
 	%t1 = getelementptr double* %p, i64 %t0
 	%t2 = load double* %t1, align 8
-	%t3 = mul double %t2, 2.2
+	%t3 = fmul double %t2, 2.2
 	store double %t3, double* %t1, align 8
 	%i.04 = trunc i64 %indvar to i16
 	%t4 = add i16 %i.04, 1
diff --git a/test/Transforms/InstCombine/sitofp.ll b/test/Transforms/InstCombine/sitofp.ll
index c26c351a741e..2bf7385cddff 100644
--- a/test/Transforms/InstCombine/sitofp.ll
+++ b/test/Transforms/InstCombine/sitofp.ll
@@ -36,7 +36,7 @@ define i32 @test6(i32 %A) {
 	%C = and i32 %A, 32		; <i32> [#uses=1]
 	%D = sitofp i32 %B to double		; <double> [#uses=1]
 	%E = sitofp i32 %C to double		; <double> [#uses=1]
-	%F = add double %D, %E		; <double> [#uses=1]
+	%F = fadd double %D, %E		; <double> [#uses=1]
 	%G = fptosi double %F to i32		; <i32> [#uses=1]
 	ret i32 %G
 }
diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll
index 03e070f89412..95df8c63f6d4 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -1,7 +1,7 @@
 ; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
-; RUN:   grep {sub float}
+; RUN:   grep {fadd float}
 ; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
-; RUN:   grep {mul float}
+; RUN:   grep {fmul float}
 ; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
 ; RUN:   not grep {insertelement.*0.00}
 ; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
@@ -26,7 +26,7 @@ entry:
 }
 
 define i32 @test2(float %f) {
-        %tmp5 = mul float %f, %f
+        %tmp5 = fmul float %f, %f
         %tmp9 = insertelement <4 x float> undef, float %tmp5, i32 0             
         %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 1    
         %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2  
diff --git a/test/Transforms/InstCombine/vec_narrow.ll b/test/Transforms/InstCombine/vec_narrow.ll
index 9063148424c5..e444c2a65103 100644
--- a/test/Transforms/InstCombine/vec_narrow.ll
+++ b/test/Transforms/InstCombine/vec_narrow.ll
@@ -5,7 +5,7 @@
 
 define float @test(%V %A, %V %B, float %f) {
         %C = insertelement %V %A, float %f, i32 0               ; <%V> [#uses=1]
-        %D = add %V %C, %B              ; <%V> [#uses=1]
+        %D = fadd %V %C, %B              ; <%V> [#uses=1]
         %E = extractelement %V %D, i32 0                ; <float> [#uses=1]
         ret float %E
 }
diff --git a/test/Transforms/InstCombine/zero-point-zero-add.ll b/test/Transforms/InstCombine/zero-point-zero-add.ll
index bae60d970356..adb28e4d5c7f 100644
--- a/test/Transforms/InstCombine/zero-point-zero-add.ll
+++ b/test/Transforms/InstCombine/zero-point-zero-add.ll
@@ -3,13 +3,13 @@
 declare double @abs(double)
 
 define double @test(double %X) {
-  %Y = add double %X, 0.0          ;; Should be a single add x, 0.0
-  %Z = add double %Y, 0.0
+  %Y = fadd double %X, 0.0          ;; Should be a single add x, 0.0
+  %Z = fadd double %Y, 0.0
   ret double %Z
 }
 
 define double @test1(double %X) {
   %Y = call double @abs(double %X)
-  %Z = add double %Y, 0.0
+  %Z = fadd double %Y, 0.0
   ret double %Z
 }
diff --git a/test/Transforms/LCSSA/2007-07-12-LICM-2.ll b/test/Transforms/LCSSA/2007-07-12-LICM-2.ll
index 58bb19dc69c3..e8dc39135295 100644
--- a/test/Transforms/LCSSA/2007-07-12-LICM-2.ll
+++ b/test/Transforms/LCSSA/2007-07-12-LICM-2.ll
@@ -5,10 +5,10 @@ entry:
 
 bb7:		; preds = %bb7, %entry
 	%tmp39 = load <4 x float>* null		; <<4 x float>> [#uses=1]
-	%tmp40 = add <4 x float> %tmp39, < float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, float 0.000000e+00 >		; <<4 x float>> [#uses=1]
-	%tmp43 = add <4 x float> %tmp40, < float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 2.000000e+00 >		; <<4 x float>> [#uses=1]
-	%tmp46 = add <4 x float> %tmp43, < float 3.000000e+00, float 0.000000e+00, float 2.000000e+00, float 4.000000e+00 >		; <<4 x float>> [#uses=1]
-	%tmp49 = add <4 x float> %tmp46, < float 0.000000e+00, float 4.000000e+00, float 6.000000e+00, float 1.000000e+00 >		; <<4 x float>> [#uses=1]
+	%tmp40 = fadd <4 x float> %tmp39, < float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, float 0.000000e+00 >		; <<4 x float>> [#uses=1]
+	%tmp43 = fadd <4 x float> %tmp40, < float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 2.000000e+00 >		; <<4 x float>> [#uses=1]
+	%tmp46 = fadd <4 x float> %tmp43, < float 3.000000e+00, float 0.000000e+00, float 2.000000e+00, float 4.000000e+00 >		; <<4 x float>> [#uses=1]
+	%tmp49 = fadd <4 x float> %tmp46, < float 0.000000e+00, float 4.000000e+00, float 6.000000e+00, float 1.000000e+00 >		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp49, <4 x float>* null
 	br i1 false, label %bb7, label %bb56
 
diff --git a/test/Transforms/LCSSA/2007-07-12-LICM-3.ll b/test/Transforms/LCSSA/2007-07-12-LICM-3.ll
index 79370ee4340b..72cebed5d9f6 100644
--- a/test/Transforms/LCSSA/2007-07-12-LICM-3.ll
+++ b/test/Transforms/LCSSA/2007-07-12-LICM-3.ll
@@ -9,10 +9,10 @@ bb:             ; preds = %bb56, %entry
 
 bb7:            ; preds = %bb7, %bb
         %tmp39 = load <4 x float>* null         ; <<4 x float>> [#uses=1]
-        %tmp40 = add <4 x float> %tmp39, < float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, float 0.000000e+00 >             ; <<4 x float>> [#uses=1]
-        %tmp43 = add <4 x float> %tmp40, < float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 2.000000e+00 >             ; <<4 x float>> [#uses=1]
-        %tmp46 = add <4 x float> %tmp43, < float 3.000000e+00, float 0.000000e+00, float 2.000000e+00, float 4.000000e+00 >             ; <<4 x float>> [#uses=1]
-        %tmp49 = add <4 x float> %tmp46, < float 0.000000e+00, float 4.000000e+00, float 6.000000e+00, float 1.000000e+00 >             ; <<4 x float>> [#uses=1]
+        %tmp40 = fadd <4 x float> %tmp39, < float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, float 0.000000e+00 >             ; <<4 x float>> [#uses=1]
+        %tmp43 = fadd <4 x float> %tmp40, < float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 2.000000e+00 >             ; <<4 x float>> [#uses=1]
+        %tmp46 = fadd <4 x float> %tmp43, < float 3.000000e+00, float 0.000000e+00, float 2.000000e+00, float 4.000000e+00 >             ; <<4 x float>> [#uses=1]
+        %tmp49 = fadd <4 x float> %tmp46, < float 0.000000e+00, float 4.000000e+00, float 6.000000e+00, float 1.000000e+00 >             ; <<4 x float>> [#uses=1]
         store <4 x float> %tmp49, <4 x float>* null
         br i1 false, label %bb7, label %bb56
 
diff --git a/test/Transforms/LCSSA/2007-07-12-LICM.ll b/test/Transforms/LCSSA/2007-07-12-LICM.ll
index 1c9830e03d9c..0c433c3ff374 100644
--- a/test/Transforms/LCSSA/2007-07-12-LICM.ll
+++ b/test/Transforms/LCSSA/2007-07-12-LICM.ll
@@ -5,7 +5,7 @@ entry:
 
 bb7:		; preds = %bb7, %entry
 	%tmp39 = load <4 x float>* null		; <<4 x float>> [#uses=1]
-	%tmp40 = add <4 x float> %tmp39, < float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, float 0.000000e+00 >		; <<4 x float>> [#uses=0]
+	%tmp40 = fadd <4 x float> %tmp39, < float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, float 0.000000e+00 >		; <<4 x float>> [#uses=0]
 	store <4 x float> zeroinitializer, <4 x float>* null
 	br i1 false, label %bb7, label %bb56
 
diff --git a/test/Transforms/LoopIndexSplit/2007-09-24-UpdateIterationSpace.ll b/test/Transforms/LoopIndexSplit/2007-09-24-UpdateIterationSpace.ll
index ec29847e7a82..928fd959ae90 100644
--- a/test/Transforms/LoopIndexSplit/2007-09-24-UpdateIterationSpace.ll
+++ b/test/Transforms/LoopIndexSplit/2007-09-24-UpdateIterationSpace.ll
@@ -8,7 +8,7 @@ entry:
 
 bb.preheader:		; preds = %entry
 	%tmp3031 = fpext float %contribution to double		; <double> [#uses=1]
-	%tmp32 = mul double %tmp3031, 5.000000e-01		; <double> [#uses=1]
+	%tmp32 = fmul double %tmp3031, 5.000000e-01		; <double> [#uses=1]
 	%tmp3839 = fpext float %sigmal to double		; <double> [#uses=1]
 	br label %bb
 
@@ -22,19 +22,19 @@ bb:		; preds = %bb.preheader, %cond_next45
 cond_true9:		; preds = %bb
 	%tmp12 = getelementptr float* %x, i32 %i.01.0		; <float*> [#uses=1]
 	%tmp13 = load float* %tmp12, align 4		; <float> [#uses=1]
-	%tmp15 = sub float %xcen, %tmp13		; <float> [#uses=1]
+	%tmp15 = fsub float %xcen, %tmp13		; <float> [#uses=1]
 	%tmp16 = tail call float @fabsf( float %tmp15 )		; <float> [#uses=1]
 	%tmp18 = fdiv float %tmp16, %sigmal		; <float> [#uses=1]
 	%tmp21 = load float** %y, align 4		; <float*> [#uses=2]
 	%tmp27 = getelementptr float* %tmp21, i32 %i.01.0		; <float*> [#uses=1]
 	%tmp28 = load float* %tmp27, align 4		; <float> [#uses=1]
 	%tmp2829 = fpext float %tmp28 to double		; <double> [#uses=1]
-	%tmp34 = sub float -0.000000e+00, %tmp18		; <float> [#uses=1]
+	%tmp34 = fsub float -0.000000e+00, %tmp18		; <float> [#uses=1]
 	%tmp3435 = fpext float %tmp34 to double		; <double> [#uses=1]
 	%tmp36 = tail call double @exp( double %tmp3435 )		; <double> [#uses=1]
-	%tmp37 = mul double %tmp32, %tmp36		; <double> [#uses=1]
+	%tmp37 = fmul double %tmp32, %tmp36		; <double> [#uses=1]
 	%tmp40 = fdiv double %tmp37, %tmp3839		; <double> [#uses=1]
-	%tmp41 = add double %tmp2829, %tmp40		; <double> [#uses=1]
+	%tmp41 = fadd double %tmp2829, %tmp40		; <double> [#uses=1]
 	%tmp4142 = fptrunc double %tmp41 to float		; <float> [#uses=1]
 	%tmp44 = getelementptr float* %tmp21, i32 %i.01.0		; <float*> [#uses=1]
 	store float %tmp4142, float* %tmp44, align 4
diff --git a/test/Transforms/LoopIndexSplit/2007-09-25-UpdateIterationSpace-2.ll b/test/Transforms/LoopIndexSplit/2007-09-25-UpdateIterationSpace-2.ll
index 7d9378512aa4..6619c7d19d8a 100644
--- a/test/Transforms/LoopIndexSplit/2007-09-25-UpdateIterationSpace-2.ll
+++ b/test/Transforms/LoopIndexSplit/2007-09-25-UpdateIterationSpace-2.ll
@@ -9,7 +9,7 @@ entry:
 
 bb.preheader:		; preds = %entry
 	%tmp3031 = fpext float %contribution to double		; <double> [#uses=1]
-	%tmp32 = mul double %tmp3031, 5.000000e-01		; <double> [#uses=1]
+	%tmp32 = fmul double %tmp3031, 5.000000e-01		; <double> [#uses=1]
 	%tmp3839 = fpext float %sigmal to double		; <double> [#uses=1]
 	br label %bb
 
@@ -24,19 +24,19 @@ bb:		; preds = %cond_next45, %bb.preheader
 cond_true9:		; preds = %bb
 	%tmp12 = getelementptr float* %x, i32 %i.01.0		; <float*> [#uses=1]
 	%tmp13 = load float* %tmp12, align 4		; <float> [#uses=1]
-	%tmp15 = sub float %xcen, %tmp13		; <float> [#uses=1]
+	%tmp15 = fsub float %xcen, %tmp13		; <float> [#uses=1]
 	%tmp16 = tail call float @fabsf(float %tmp15)		; <float> [#uses=1]
 	%tmp18 = fdiv float %tmp16, %sigmal		; <float> [#uses=1]
 	%tmp21 = load float** %y, align 4		; <float*> [#uses=2]
 	%tmp27 = getelementptr float* %tmp21, i32 %k.06.0		; <float*> [#uses=1]
 	%tmp28 = load float* %tmp27, align 4		; <float> [#uses=1]
 	%tmp2829 = fpext float %tmp28 to double		; <double> [#uses=1]
-	%tmp34 = sub float -0.000000e+00, %tmp18		; <float> [#uses=1]
+	%tmp34 = fsub float -0.000000e+00, %tmp18		; <float> [#uses=1]
 	%tmp3435 = fpext float %tmp34 to double		; <double> [#uses=1]
 	%tmp36 = tail call double @exp(double %tmp3435)		; <double> [#uses=1]
-	%tmp37 = mul double %tmp32, %tmp36		; <double> [#uses=1]
+	%tmp37 = fmul double %tmp32, %tmp36		; <double> [#uses=1]
 	%tmp40 = fdiv double %tmp37, %tmp3839		; <double> [#uses=1]
-	%tmp41 = add double %tmp2829, %tmp40		; <double> [#uses=1]
+	%tmp41 = fadd double %tmp2829, %tmp40		; <double> [#uses=1]
 	%tmp4142 = fptrunc double %tmp41 to float		; <float> [#uses=1]
 	%tmp44 = getelementptr float* %tmp21, i32 %k.06.0		; <float*> [#uses=1]
 	store float %tmp4142, float* %tmp44, align 4
diff --git a/test/Transforms/Mem2Reg/PromoteMemToRegister.ll b/test/Transforms/Mem2Reg/PromoteMemToRegister.ll
index fdc33fb26897..63b8c783c256 100644
--- a/test/Transforms/Mem2Reg/PromoteMemToRegister.ll
+++ b/test/Transforms/Mem2Reg/PromoteMemToRegister.ll
@@ -12,7 +12,7 @@ define double @testfunc(i32 %i, double %j) {
 	%t3 = load i32* %I		; <i32> [#uses=1]
 	%t4 = sitofp i32 %t3 to double		; <double> [#uses=1]
 	%t5 = load double* %J		; <double> [#uses=1]
-	%t6 = mul double %t4, %t5		; <double> [#uses=1]
+	%t6 = fmul double %t4, %t5		; <double> [#uses=1]
 	ret double %t6
 }
 
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index c5cdc29a5cf2..94daee0149ed 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -7,7 +7,7 @@ define void @ccosl({ x86_fp80, x86_fp80 }* sret  %agg.result, x86_fp80 %z.0, x86
 entry:
 	%tmp2 = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=1]
 	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	%tmp5 = sub x86_fp80 0xK80000000000000000000, %z.1		; <x86_fp80> [#uses=1]
+	%tmp5 = fsub x86_fp80 0xK80000000000000000000, %z.1		; <x86_fp80> [#uses=1]
 	call void @ccoshl( { x86_fp80, x86_fp80 }* sret  %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0 ) nounwind 
 	%tmp219 = bitcast { x86_fp80, x86_fp80 }* %tmp2 to i8*		; <i8*> [#uses=2]
 	%memtmp20 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
diff --git a/test/Transforms/MemCpyOpt/sret.ll b/test/Transforms/MemCpyOpt/sret.ll
index 1ac11aa959f9..ad9fb1b21593 100644
--- a/test/Transforms/MemCpyOpt/sret.ll
+++ b/test/Transforms/MemCpyOpt/sret.ll
@@ -9,7 +9,7 @@ entry:
 	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
 	%tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1		; <x86_fp80*> [#uses=1]
 	%tmp2 = load x86_fp80* %tmp1, align 16		; <x86_fp80> [#uses=1]
-	%tmp3 = sub x86_fp80 0xK80000000000000000000, %tmp2		; <x86_fp80> [#uses=1]
+	%tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2		; <x86_fp80> [#uses=1]
 	%tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1		; <x86_fp80*> [#uses=1]
 	%real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0		; <x86_fp80*> [#uses=1]
 	%tmp7 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0		; <x86_fp80*> [#uses=1]
diff --git a/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll b/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll
index c3600ab84c7c..74434f4d952a 100644
--- a/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll
+++ b/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll
@@ -477,12 +477,12 @@ invcont3:		; preds = %bb2
 	unreachable
 
 bb4:		; preds = %invcont
-	%3 = mul x86_fp80 %0, 0xK40008000000000000000		; <x86_fp80> [#uses=1]
+	%3 = fmul x86_fp80 %0, 0xK40008000000000000000		; <x86_fp80> [#uses=1]
 	%4 = fcmp ult x86_fp80 %3, 0xKC0068000000000000000		; <i1> [#uses=1]
 	br i1 %4, label %bb8, label %bb6
 
 bb6:		; preds = %bb4
-	%5 = mul x86_fp80 %0, 0xK40008000000000000000		; <x86_fp80> [#uses=1]
+	%5 = fmul x86_fp80 %0, 0xK40008000000000000000		; <x86_fp80> [#uses=1]
 	%6 = fcmp ugt x86_fp80 %5, 0xK4005FE00000000000000		; <i1> [#uses=1]
 	br i1 %6, label %bb8, label %bb10
 
@@ -494,16 +494,16 @@ invcont9:		; preds = %bb8
 	unreachable
 
 bb10:		; preds = %bb6
-	%7 = mul x86_fp80 %0, 0xK40008000000000000000		; <x86_fp80> [#uses=3]
+	%7 = fmul x86_fp80 %0, 0xK40008000000000000000		; <x86_fp80> [#uses=3]
 	%8 = fcmp ult x86_fp80 %7, 0xK00000000000000000000		; <i1> [#uses=1]
 	br i1 %8, label %bb13, label %bb12
 
 bb12:		; preds = %bb10
-	%9 = add x86_fp80 %7, 0xK3FFDFFFFFFFFFFFFFFFF		; <x86_fp80> [#uses=1]
+	%9 = fadd x86_fp80 %7, 0xK3FFDFFFFFFFFFFFFFFFF		; <x86_fp80> [#uses=1]
 	br label %bb14
 
 bb13:		; preds = %bb10
-	%10 = sub x86_fp80 %7, 0xK3FFDFFFFFFFFFFFFFFFF		; <x86_fp80> [#uses=1]
+	%10 = fsub x86_fp80 %7, 0xK3FFDFFFFFFFFFFFFFFFF		; <x86_fp80> [#uses=1]
 	br label %bb14
 
 bb14:		; preds = %bb13, %bb12
diff --git a/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll b/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
index 05d6103bbfd3..3662e097c9f0 100644
--- a/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
+++ b/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
@@ -1,8 +1,8 @@
 ; RUN: llvm-as < %s | opt -reassociate -disable-output
 
 define void @foo() {
-	%tmp162 = sub <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>> [#uses=1]
-	%tmp164 = mul <4 x float> zeroinitializer, %tmp162		; <<4 x float>> [#uses=0]
+	%tmp162 = fsub <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>> [#uses=1]
+	%tmp164 = fmul <4 x float> zeroinitializer, %tmp162		; <<4 x float>> [#uses=0]
 	ret void
 }
 
diff --git a/test/Transforms/SCCP/2006-12-04-PackedType.ll b/test/Transforms/SCCP/2006-12-04-PackedType.ll
index b7a7880b0faf..0e268c2db839 100644
--- a/test/Transforms/SCCP/2006-12-04-PackedType.ll
+++ b/test/Transforms/SCCP/2006-12-04-PackedType.ll
@@ -112,7 +112,7 @@ cond_true93:		; preds = %entry
 	%tmp.upgrd.1 = getelementptr %struct.GLDContextRec* %ctx, i32 0, i32 31, i32 14		; <i32*> [#uses=1]
 	%tmp95 = load i32* %tmp.upgrd.1		; <i32> [#uses=1]
 	%tmp95.upgrd.2 = sitofp i32 %tmp95 to float		; <float> [#uses=1]
-	%tmp108 = mul float undef, %tmp95.upgrd.2		; <float> [#uses=1]
+	%tmp108 = fmul float undef, %tmp95.upgrd.2		; <float> [#uses=1]
 	br label %cond_next116
 cond_next116:		; preds = %cond_true93, %entry
 	%point_size.2 = phi float [ %tmp108, %cond_true93 ], [ undef, %entry ]		; <float> [#uses=2]
@@ -130,7 +130,7 @@ cond_true462:		; preds = %cond_true458
 cond_true467:		; preds = %cond_true462
 	ret void
 cond_next484:		; preds = %cond_next116
-	%tmp486 = mul float %point_size.2, 5.000000e-01		; <float> [#uses=1]
+	%tmp486 = fmul float %point_size.2, 5.000000e-01		; <float> [#uses=1]
 	br label %cond_next487
 cond_next487:		; preds = %cond_next484, %cond_true462, %cond_true458
 	%radius.0 = phi float [ %tmp486, %cond_next484 ], [ 5.000000e-01, %cond_true458 ], [ 5.000000e-01, %cond_true462 ]		; <float> [#uses=2]
diff --git a/test/Transforms/SCCP/apint-ipsccp4.ll b/test/Transforms/SCCP/apint-ipsccp4.ll
index de355d1d50e1..a0656b75c132 100644
--- a/test/Transforms/SCCP/apint-ipsccp4.ll
+++ b/test/Transforms/SCCP/apint-ipsccp4.ll
@@ -35,10 +35,10 @@ define float @All()
   %B = fcmp oge float %A, 1.0
   br i1 %B, label %T, label %F
 T:
-  %C = add float %A, 1.0
+  %C = fadd float %A, 1.0
   br label %exit
 F:
-  %D = add float %A, 2.0
+  %D = fadd float %A, 2.0
   br label %exit
 exit:
   %E = phi float [%C, %T], [%D, %F]
diff --git a/test/Transforms/ScalarRepl/2009-03-17-CleanUp.ll b/test/Transforms/ScalarRepl/2009-03-17-CleanUp.ll
index 13055ea87b60..facb7c13c0ad 100644
--- a/test/Transforms/ScalarRepl/2009-03-17-CleanUp.ll
+++ b/test/Transforms/ScalarRepl/2009-03-17-CleanUp.ll
@@ -1766,7 +1766,7 @@ _ZL13random_doublev.exit:		; preds = %bb.i, %bb7
 	call void @llvm.dbg.stoppoint(i32 75, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
 	%22 = load i32* @_ZZL13random_doublevE4seed, align 4		; <i32> [#uses=2]
 	%23 = sitofp i32 %22 to double		; <double> [#uses=1]
-	%24 = mul double %23, 0x3E340000002813D9		; <double> [#uses=1]
+	%24 = fmul double %23, 0x3E340000002813D9		; <double> [#uses=1]
 	call void @llvm.dbg.stoppoint(i32 76, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
 	%25 = xor i32 %22, 123459876		; <i32> [#uses=1]
 	store i32 %25, i32* @_ZZL13random_doublevE4seed, align 4
@@ -1803,7 +1803,7 @@ bb8:		; preds = %bb.i1, %_ZL13random_doublev.exit
 	call void @llvm.dbg.stoppoint(i32 75, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
 	%38 = load i32* @_ZZL13random_doublevE4seed, align 4		; <i32> [#uses=2]
 	%39 = sitofp i32 %38 to double		; <double> [#uses=1]
-	%40 = mul double %39, 0x3E340000002813D9		; <double> [#uses=1]
+	%40 = fmul double %39, 0x3E340000002813D9		; <double> [#uses=1]
 	call void @llvm.dbg.stoppoint(i32 76, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
 	%41 = xor i32 %38, 123459876		; <i32> [#uses=1]
 	store i32 %41, i32* @_ZZL13random_doublevE4seed, align 4
@@ -2110,16 +2110,16 @@ entry:
 	%real7 = load double* %real6, align 8		; <double> [#uses=4]
 	%imag8 = getelementptr %1* %memtmp1, i32 0, i32 1		; <double*> [#uses=1]
 	%imag9 = load double* %imag8, align 8		; <double> [#uses=4]
-	%21 = mul double %real3, %real7		; <double> [#uses=1]
-	%22 = mul double %imag5, %imag9		; <double> [#uses=1]
-	%23 = add double %21, %22		; <double> [#uses=1]
-	%24 = mul double %real7, %real7		; <double> [#uses=1]
-	%25 = mul double %imag9, %imag9		; <double> [#uses=1]
-	%26 = add double %24, %25		; <double> [#uses=2]
+	%21 = fmul double %real3, %real7		; <double> [#uses=1]
+	%22 = fmul double %imag5, %imag9		; <double> [#uses=1]
+	%23 = fadd double %21, %22		; <double> [#uses=1]
+	%24 = fmul double %real7, %real7		; <double> [#uses=1]
+	%25 = fmul double %imag9, %imag9		; <double> [#uses=1]
+	%26 = fadd double %24, %25		; <double> [#uses=2]
 	%27 = fdiv double %23, %26		; <double> [#uses=1]
-	%28 = mul double %imag5, %real7		; <double> [#uses=1]
-	%29 = mul double %real3, %imag9		; <double> [#uses=1]
-	%30 = sub double %28, %29		; <double> [#uses=1]
+	%28 = fmul double %imag5, %real7		; <double> [#uses=1]
+	%29 = fmul double %real3, %imag9		; <double> [#uses=1]
+	%30 = fsub double %28, %29		; <double> [#uses=1]
 	%31 = fdiv double %30, %26		; <double> [#uses=1]
 	%real10 = getelementptr %1* %0, i32 0, i32 0		; <double*> [#uses=1]
 	store double %27, double* %real10, align 8
@@ -2227,12 +2227,12 @@ entry:
 	%real9 = load double* %real8, align 8		; <double> [#uses=2]
 	%imag10 = getelementptr %1* %memtmp3, i32 0, i32 1		; <double*> [#uses=1]
 	%imag11 = load double* %imag10, align 8		; <double> [#uses=2]
-	%27 = mul double %real5, %real9		; <double> [#uses=1]
-	%28 = mul double %imag7, %imag11		; <double> [#uses=1]
-	%29 = sub double %27, %28		; <double> [#uses=1]
-	%30 = mul double %real5, %imag11		; <double> [#uses=1]
-	%31 = mul double %real9, %imag7		; <double> [#uses=1]
-	%32 = add double %30, %31		; <double> [#uses=1]
+	%27 = fmul double %real5, %real9		; <double> [#uses=1]
+	%28 = fmul double %imag7, %imag11		; <double> [#uses=1]
+	%29 = fsub double %27, %28		; <double> [#uses=1]
+	%30 = fmul double %real5, %imag11		; <double> [#uses=1]
+	%31 = fmul double %real9, %imag7		; <double> [#uses=1]
+	%32 = fadd double %30, %31		; <double> [#uses=1]
 	%real12 = getelementptr %1* %0, i32 0, i32 0		; <double*> [#uses=1]
 	store double %29, double* %real12, align 8
 	%imag13 = getelementptr %1* %0, i32 0, i32 1		; <double*> [#uses=1]
@@ -2384,10 +2384,10 @@ entry:
 	call void @llvm.dbg.stoppoint(i32 444, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit5 to %0*))
 	%0 = call double* @_ZNKSt7complexIdE4imagEv(%"struct.std::complex<double>"* %__x) nounwind		; <double*> [#uses=1]
 	%1 = load double* %0, align 8		; <double> [#uses=1]
-	%2 = sub double -0.000000e+00, %1		; <double> [#uses=1]
+	%2 = fsub double -0.000000e+00, %1		; <double> [#uses=1]
 	%3 = call double* @_ZNKSt7complexIdE4realEv(%"struct.std::complex<double>"* %__x) nounwind		; <double*> [#uses=1]
 	%4 = load double* %3, align 8		; <double> [#uses=1]
-	%5 = sub double -0.000000e+00, %4		; <double> [#uses=1]
+	%5 = fsub double -0.000000e+00, %4		; <double> [#uses=1]
 	call void @_ZNSt7complexIdEC1Edd(%"struct.std::complex<double>"* %agg.result, double %5, double %2) nounwind
 	call void @llvm.dbg.region.end(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram576 to %0*))
 	ret void
@@ -2497,16 +2497,16 @@ entry:
 	%real9 = load double* %real8, align 8		; <double> [#uses=4]
 	%imag10 = getelementptr %1* %memtmp3, i32 0, i32 1		; <double*> [#uses=1]
 	%imag11 = load double* %imag10, align 8		; <double> [#uses=4]
-	%27 = mul double %real5, %real9		; <double> [#uses=1]
-	%28 = mul double %imag7, %imag11		; <double> [#uses=1]
-	%29 = add double %27, %28		; <double> [#uses=1]
-	%30 = mul double %real9, %real9		; <double> [#uses=1]
-	%31 = mul double %imag11, %imag11		; <double> [#uses=1]
-	%32 = add double %30, %31		; <double> [#uses=2]
+	%27 = fmul double %real5, %real9		; <double> [#uses=1]
+	%28 = fmul double %imag7, %imag11		; <double> [#uses=1]
+	%29 = fadd double %27, %28		; <double> [#uses=1]
+	%30 = fmul double %real9, %real9		; <double> [#uses=1]
+	%31 = fmul double %imag11, %imag11		; <double> [#uses=1]
+	%32 = fadd double %30, %31		; <double> [#uses=2]
 	%33 = fdiv double %29, %32		; <double> [#uses=1]
-	%34 = mul double %imag7, %real9		; <double> [#uses=1]
-	%35 = mul double %real5, %imag11		; <double> [#uses=1]
-	%36 = sub double %34, %35		; <double> [#uses=1]
+	%34 = fmul double %imag7, %real9		; <double> [#uses=1]
+	%35 = fmul double %real5, %imag11		; <double> [#uses=1]
+	%36 = fsub double %34, %35		; <double> [#uses=1]
 	%37 = fdiv double %36, %32		; <double> [#uses=1]
 	%real12 = getelementptr %1* %0, i32 0, i32 0		; <double*> [#uses=1]
 	store double %33, double* %real12, align 8
@@ -2554,7 +2554,7 @@ entry:
 	%1 = load double* %0, align 4		; <double> [#uses=1]
 	%2 = call double* @_ZNKSt7complexIdE4realEv(%"struct.std::complex<double>"* %__z) nounwind		; <double*> [#uses=1]
 	%3 = load double* %2, align 8		; <double> [#uses=1]
-	%4 = add double %1, %3		; <double> [#uses=1]
+	%4 = fadd double %1, %3		; <double> [#uses=1]
 	%5 = getelementptr %"struct.std::complex<double>"* %this, i32 0, i32 0, i32 0		; <double*> [#uses=1]
 	store double %4, double* %5, align 4
 	call void @llvm.dbg.stoppoint(i32 1271, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit5 to %0*))
@@ -2562,7 +2562,7 @@ entry:
 	%7 = load double* %6, align 4		; <double> [#uses=1]
 	%8 = call double* @_ZNKSt7complexIdE4imagEv(%"struct.std::complex<double>"* %__z) nounwind		; <double*> [#uses=1]
 	%9 = load double* %8, align 8		; <double> [#uses=1]
-	%10 = add double %7, %9		; <double> [#uses=1]
+	%10 = fadd double %7, %9		; <double> [#uses=1]
 	%11 = getelementptr %"struct.std::complex<double>"* %this, i32 0, i32 0, i32 1		; <double*> [#uses=1]
 	store double %10, double* %11, align 4
 	call void @llvm.dbg.stoppoint(i32 1272, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit5 to %0*))
@@ -2599,7 +2599,7 @@ entry:
 	%1 = load double* %0, align 4		; <double> [#uses=1]
 	%2 = call double* @_ZNKSt7complexIdE4realEv(%"struct.std::complex<double>"* %__z) nounwind		; <double*> [#uses=1]
 	%3 = load double* %2, align 8		; <double> [#uses=1]
-	%4 = sub double %1, %3		; <double> [#uses=1]
+	%4 = fsub double %1, %3		; <double> [#uses=1]
 	%5 = getelementptr %"struct.std::complex<double>"* %this, i32 0, i32 0, i32 0		; <double*> [#uses=1]
 	store double %4, double* %5, align 4
 	call void @llvm.dbg.stoppoint(i32 1280, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit5 to %0*))
@@ -2607,7 +2607,7 @@ entry:
 	%7 = load double* %6, align 4		; <double> [#uses=1]
 	%8 = call double* @_ZNKSt7complexIdE4imagEv(%"struct.std::complex<double>"* %__z) nounwind		; <double*> [#uses=1]
 	%9 = load double* %8, align 8		; <double> [#uses=1]
-	%10 = sub double %7, %9		; <double> [#uses=1]
+	%10 = fsub double %7, %9		; <double> [#uses=1]
 	%11 = getelementptr %"struct.std::complex<double>"* %this, i32 0, i32 0, i32 1		; <double*> [#uses=1]
 	store double %10, double* %11, align 4
 	call void @llvm.dbg.stoppoint(i32 1281, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit5 to %0*))
diff --git a/test/Transforms/ScalarRepl/copy-aggregate.ll b/test/Transforms/ScalarRepl/copy-aggregate.ll
index 4ab17ae2bbb9..a1ad3f9b8280 100644
--- a/test/Transforms/ScalarRepl/copy-aggregate.ll
+++ b/test/Transforms/ScalarRepl/copy-aggregate.ll
@@ -25,7 +25,7 @@ define float @test2(i128 %V) nounwind {
 	%B = getelementptr {[4 x float]}* %X, i32 0, i32 0, i32 3
 	%a = load float* %A
 	%b = load float* %B
-	%c = add float %a, %b
+	%c = fadd float %a, %b
 	ret float %c
 }
 
diff --git a/test/Transforms/ScalarRepl/memcpy-from-global.ll b/test/Transforms/ScalarRepl/memcpy-from-global.ll
index ee77e1fdeb7e..e62ccc295451 100644
--- a/test/Transforms/ScalarRepl/memcpy-from-global.ll
+++ b/test/Transforms/ScalarRepl/memcpy-from-global.ll
@@ -10,23 +10,23 @@ entry:
 	%tmp5 = and i32 %tmp3, 124		; <i32> [#uses=4]
 	%tmp753 = getelementptr [128 x float]* %lookupTable, i32 0, i32 %tmp5		; <float*> [#uses=1]
 	%tmp9 = load float* %tmp753		; <float> [#uses=1]
-	%tmp11 = mul float %tmp9, %x		; <float> [#uses=1]
-	%tmp13 = add float %tmp11, 0.000000e+00		; <float> [#uses=1]
+	%tmp11 = fmul float %tmp9, %x		; <float> [#uses=1]
+	%tmp13 = fadd float %tmp11, 0.000000e+00		; <float> [#uses=1]
 	%tmp17.sum52 = or i32 %tmp5, 1		; <i32> [#uses=1]
 	%tmp1851 = getelementptr [128 x float]* %lookupTable, i32 0, i32 %tmp17.sum52		; <float*> [#uses=1]
 	%tmp19 = load float* %tmp1851		; <float> [#uses=1]
-	%tmp21 = mul float %tmp19, %y		; <float> [#uses=1]
-	%tmp23 = add float %tmp21, %tmp13		; <float> [#uses=1]
+	%tmp21 = fmul float %tmp19, %y		; <float> [#uses=1]
+	%tmp23 = fadd float %tmp21, %tmp13		; <float> [#uses=1]
 	%tmp27.sum50 = or i32 %tmp5, 2		; <i32> [#uses=1]
 	%tmp2849 = getelementptr [128 x float]* %lookupTable, i32 0, i32 %tmp27.sum50		; <float*> [#uses=1]
 	%tmp29 = load float* %tmp2849		; <float> [#uses=1]
-	%tmp31 = mul float %tmp29, %z		; <float> [#uses=1]
-	%tmp33 = add float %tmp31, %tmp23		; <float> [#uses=1]
+	%tmp31 = fmul float %tmp29, %z		; <float> [#uses=1]
+	%tmp33 = fadd float %tmp31, %tmp23		; <float> [#uses=1]
 	%tmp37.sum48 = or i32 %tmp5, 3		; <i32> [#uses=1]
 	%tmp3847 = getelementptr [128 x float]* %lookupTable, i32 0, i32 %tmp37.sum48		; <float*> [#uses=1]
 	%tmp39 = load float* %tmp3847		; <float> [#uses=1]
-	%tmp41 = mul float %tmp39, %w		; <float> [#uses=1]
-	%tmp43 = add float %tmp41, %tmp33		; <float> [#uses=1]
+	%tmp41 = fmul float %tmp39, %w		; <float> [#uses=1]
+	%tmp43 = fadd float %tmp41, %tmp33		; <float> [#uses=1]
 	ret float %tmp43
 }
 
diff --git a/test/Transforms/ScalarRepl/vector_promote.ll b/test/Transforms/ScalarRepl/vector_promote.ll
index a0d331719f06..4b6555b3d673 100644
--- a/test/Transforms/ScalarRepl/vector_promote.ll
+++ b/test/Transforms/ScalarRepl/vector_promote.ll
@@ -5,12 +5,12 @@ define void @test(<4 x float>* %F, float %f) {
 entry:
 	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=3]
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp3 = add <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp3, <4 x float>* %G
 	%G.upgrd.1 = getelementptr <4 x float>* %G, i32 0, i32 0		; <float*> [#uses=1]
 	store float %f, float* %G.upgrd.1
 	%tmp4 = load <4 x float>* %G		; <<4 x float>> [#uses=2]
-	%tmp6 = add <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
+	%tmp6 = fadd <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp6, <4 x float>* %F
 	ret void
 }
@@ -19,12 +19,12 @@ define void @test2(<4 x float>* %F, float %f) {
 entry:
 	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=3]
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp3 = add <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp3, <4 x float>* %G
 	%tmp.upgrd.2 = getelementptr <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]
 	store float %f, float* %tmp.upgrd.2
 	%tmp4 = load <4 x float>* %G		; <<4 x float>> [#uses=2]
-	%tmp6 = add <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
+	%tmp6 = fadd <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp6, <4 x float>* %F
 	ret void
 }
@@ -33,7 +33,7 @@ define void @test3(<4 x float>* %F, float* %f) {
 entry:
 	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=2]
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp3 = add <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp3, <4 x float>* %G
 	%tmp.upgrd.3 = getelementptr <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]
 	%tmp.upgrd.4 = load float* %tmp.upgrd.3		; <float> [#uses=1]
@@ -45,7 +45,7 @@ define void @test4(<4 x float>* %F, float* %f) {
 entry:
 	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=2]
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp3 = add <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp3, <4 x float>* %G
 	%G.upgrd.5 = getelementptr <4 x float>* %G, i32 0, i32 0		; <float*> [#uses=1]
 	%tmp.upgrd.6 = load float* %G.upgrd.5		; <float> [#uses=1]
diff --git a/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll b/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll
index f22ca6ca330a..6bfef0214931 100644
--- a/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll
+++ b/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll
@@ -142,11 +142,11 @@ invcont57:		; preds = %invcont51
 	store double %tmp64, double* %tmp62
 	%tmp65 = call double* @_ZN6QSizeF6rwidthEv( %struct.QPointF* %scaledPageSize )		; <double*> [#uses=2]
 	%tmp67 = load double* %tmp65		; <double> [#uses=1]
-	%tmp69 = mul double %tmp67, %tmp48		; <double> [#uses=1]
+	%tmp69 = fmul double %tmp67, %tmp48		; <double> [#uses=1]
 	store double %tmp69, double* %tmp65
 	%tmp71 = call double* @_ZN6QSizeF7rheightEv( %struct.QPointF* %scaledPageSize )		; <double*> [#uses=2]
 	%tmp73 = load double* %tmp71		; <double> [#uses=1]
-	%tmp75 = mul double %tmp73, %tmp54		; <double> [#uses=1]
+	%tmp75 = fmul double %tmp73, %tmp54		; <double> [#uses=1]
 	store double %tmp75, double* %tmp71
 	%tmp78 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
 	%tmp80 = invoke i32 @_ZNK12QPaintDevice6heightEv( %struct.QPaintDevice* %tmp78 )
@@ -188,7 +188,7 @@ invcont104:		; preds = %invcont103
 			to label %invcont106 unwind label %cleanup329		; <i32> [#uses=1]
 invcont106:		; preds = %invcont104
 	%tmp108 = sitofp i32 %tmp107 to double		; <double> [#uses=1]
-	%tmp109 = mul double %tmp108, 0x3FE93264C993264C		; <double> [#uses=1]
+	%tmp109 = fmul double %tmp108, 0x3FE93264C993264C		; <double> [#uses=1]
 	%tmp109.upgrd.17 = fptosi double %tmp109 to i32		; <i32> [#uses=3]
 	%tmp.upgrd.18 = call %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv( %struct.QAbstractTextDocumentLayout* %tmp95 )		; <%struct.QTextBlockGroup*> [#uses=1]
 	invoke void @_ZNK10QTextFrame11frameFormatEv( %struct.QTextBlockFormat* sret  %fmt, %struct.QTextBlockGroup* %tmp.upgrd.18 )
@@ -235,7 +235,7 @@ invcont124:		; preds = %invcont122
 	store double %tmp137, double* %tmp135
 	%tmp138 = call double @_ZNK6QRectF6heightEv( %struct.QRectF* %body )		; <double> [#uses=1]
 	%tmp139 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
-	%tmp140 = sub double %tmp138, %tmp139		; <double> [#uses=1]
+	%tmp140 = fsub double %tmp138, %tmp139		; <double> [#uses=1]
 	%tmp142 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
 			to label %invcont141 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
 invcont141:		; preds = %invcont124
@@ -249,7 +249,7 @@ invcont146:		; preds = %invcont144
 			to label %invcont148 unwind label %cleanup168		; <i32> [#uses=1]
 invcont148:		; preds = %invcont146
 	%tmp149.upgrd.21 = sitofp i32 %tmp149 to double		; <double> [#uses=1]
-	%tmp150 = add double %tmp140, %tmp149.upgrd.21		; <double> [#uses=1]
+	%tmp150 = fadd double %tmp140, %tmp149.upgrd.21		; <double> [#uses=1]
 	%tmp152 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
 			to label %invcont151 unwind label %cleanup168		; <%struct.QPaintDevice*> [#uses=1]
 invcont151:		; preds = %invcont148
@@ -259,10 +259,10 @@ invcont153:		; preds = %invcont151
 	%tmp155 = mul i32 %tmp154, 5		; <i32> [#uses=1]
 	%tmp156 = sdiv i32 %tmp155, 72		; <i32> [#uses=1]
 	%tmp156.upgrd.22 = sitofp i32 %tmp156 to double		; <double> [#uses=1]
-	%tmp157 = add double %tmp150, %tmp156.upgrd.22		; <double> [#uses=1]
+	%tmp157 = fadd double %tmp150, %tmp156.upgrd.22		; <double> [#uses=1]
 	%tmp158 = call double @_ZNK6QRectF5widthEv( %struct.QRectF* %body )		; <double> [#uses=1]
 	%tmp159 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
-	%tmp160 = sub double %tmp158, %tmp159		; <double> [#uses=1]
+	%tmp160 = fsub double %tmp158, %tmp159		; <double> [#uses=1]
 	call void @_ZN7QPointFC1Edd( %struct.QPointF* %tmp2, double %tmp160, double %tmp157 )
 	%tmp161 = getelementptr %struct.QPointF* %pageNumberPos, i32 0, i32 0		; <double*> [#uses=1]
 	%tmp162 = getelementptr %struct.QPointF* %tmp2, i32 0, i32 0		; <double*> [#uses=1]
diff --git a/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll b/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
index 43ff690150e3..4c9c9e8ae685 100644
--- a/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
+++ b/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
@@ -15,7 +15,7 @@ entry:
 	br i1 %toBool, label %cond_true, label %cond_next
 
 cond_true:		; preds = %entry
-	%tmp7 = add double %tmp, %Z		; <double> [#uses=1]
+	%tmp7 = fadd double %tmp, %Z		; <double> [#uses=1]
 	br label %cond_next
 
 cond_next:		; preds = %cond_true, %entry
diff --git a/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll b/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll
index a370b9521090..be3410c15a38 100644
--- a/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll
+++ b/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll
@@ -21,7 +21,7 @@ bb56:		; preds = %bb48
 
 
 bb174:		; preds = %bb144, %bb114
-	%tmp191 = mul x86_fp80 0xK00000000000000000000, 0xK3FFE8000000000000000		; <x86_fp80> [#uses=1]
+	%tmp191 = fmul x86_fp80 0xK00000000000000000000, 0xK3FFE8000000000000000		; <x86_fp80> [#uses=1]
 	br label %bb196
 
 bb196:		; preds = %bb174, %bb56, %bb40
diff --git a/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll b/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll
index 5969f27cdfcd..dc0cbbebedc2 100644
--- a/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll
+++ b/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll
@@ -29,7 +29,7 @@ bb3:		; preds = %bb2, %bb1
 	store i32 %storemerge, i32* @j
 	%1 = sitofp i32 %storemerge to double		; <double> [#uses=1]
 	%2 = call double @sin(double %1) nounwind readonly		; <double> [#uses=1]
-	%3 = add double %2, %d.0		; <double> [#uses=1]
+	%3 = fadd double %2, %d.0		; <double> [#uses=1]
 	%4 = add i32 %l.0, 1		; <i32> [#uses=1]
 	br label %bb4
 
diff --git a/test/Transforms/SimplifyLibCalls/half-powr.ll b/test/Transforms/SimplifyLibCalls/half-powr.ll
index f4e898c0b236..890e788d8c2e 100644
--- a/test/Transforms/SimplifyLibCalls/half-powr.ll
+++ b/test/Transforms/SimplifyLibCalls/half-powr.ll
@@ -11,7 +11,7 @@ bb:		; preds = %entry
 
 bb1:		; preds = %bb, %entry
 	%f_addr.0 = phi float [ %1, %bb ], [ %f, %entry ]		; <float> [#uses=1]
-	%2 = mul float %f_addr.0, %g		; <float> [#uses=1]
+	%2 = fmul float %f_addr.0, %g		; <float> [#uses=1]
 	ret float %2
 }
 
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 113d98743c47..5c1ee351a2a0 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -2,9 +2,9 @@
 # large and three small executables. This is done to minimize memory load
 # in parallel builds.  Please retain this ordering.
 
-if( NOT MSVC )
-  add_subdirectory(llvm-config)
-endif( NOT MSVC )
+if (NOT USE_EXPLICIT_DEPENDENCIES)
+ add_subdirectory(llvm-config)
+endif()
 
 add_subdirectory(opt)
 add_subdirectory(llvm-as)
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index 4808f0e529cc..c630331d684c 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -100,6 +100,16 @@ cl::opt<bool> NoVerify("disable-verify", cl::Hidden,
                        cl::desc("Do not verify input module"));
 
 
+static cl::opt<bool>
+DisableRedZone("disable-red-zone",
+  cl::desc("Do not emit code that uses the red zone."),
+  cl::init(false));
+
+static cl::opt<bool>
+NoImplicitFloats("no-implicit-float",
+  cl::desc("Don't generate implicit floating point instructions (x86-only)"),
+  cl::init(false));
+
 // GetFileNameRoot - Helper function to get the basename of a filename.
 static inline std::string
 GetFileNameRoot(const std::string &InputFilename) {
@@ -336,8 +346,13 @@ int main(int argc, char **argv) {
     // Run our queue of passes all at once now, efficiently.
     // TODO: this could lazily stream functions out of the module.
     for (Module::iterator I = mod.begin(), E = mod.end(); I != E; ++I)
-      if (!I->isDeclaration())
+      if (!I->isDeclaration()) {
+        if (DisableRedZone)
+          I->addFnAttr(Attribute::NoRedZone);
+        if (NoImplicitFloats)
+          I->addFnAttr(Attribute::NoImplicitFloat);
         Passes.run(*I);
+      }
 
     Passes.doFinalization();
   }
diff --git a/tools/llvm-ld/Optimize.cpp b/tools/llvm-ld/Optimize.cpp
index a4ca95199a1c..e4668958dbbe 100644
--- a/tools/llvm-ld/Optimize.cpp
+++ b/tools/llvm-ld/Optimize.cpp
@@ -94,7 +94,7 @@ void Optimize(Module* M) {
 
   if (!DisableOptimizations)
     createStandardLTOPasses(&Passes, !DisableInternalize, !DisableInline,
-                            /*RunSecondGlobalOpt=*/true, VerifyEach);
+                            VerifyEach);
 
   // If the -s or -S command line options were specified, strip the symbols out
   // of the resulting program to make it smaller.  -s and -S are GNU ld options
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index 03a11b687daa..0bd2abe24558 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -72,7 +72,7 @@ LTOCodeGenerator::LTOCodeGenerator()
     : _linker("LinkTimeOptimizer", "ld-temp.o"), _target(NULL),
       _emitDwarfDebugInfo(false), _scopeRestrictionsDone(false),
       _codeModel(LTO_CODEGEN_PIC_MODEL_DYNAMIC),
-      _nativeObjectFile(NULL), _gccPath(NULL)
+      _nativeObjectFile(NULL), _gccPath(NULL), _assemblerPath(NULL)
 {
 
 }
@@ -128,6 +128,13 @@ void LTOCodeGenerator::setGccPath(const char* path)
     _gccPath = new sys::Path(path);
 }
 
+void LTOCodeGenerator::setAssemblerPath(const char* path)
+{
+    if ( _assemblerPath )
+        delete _assemblerPath;
+    _assemblerPath = new sys::Path(path);
+}
+
 void LTOCodeGenerator::addMustPreserveSymbol(const char* sym)
 {
     _mustPreserveSymbols[sym] = 1;
@@ -220,13 +227,18 @@ const void* LTOCodeGenerator::compile(size_t* length, std::string& errMsg)
 bool LTOCodeGenerator::assemble(const std::string& asmPath, 
                                 const std::string& objPath, std::string& errMsg)
 {
-    sys::Path gcc;
-    if ( _gccPath ) {
-        gcc = *_gccPath;
+    sys::Path tool;
+    bool needsCompilerOptions = true;
+    if ( _assemblerPath ) {
+        tool = *_assemblerPath;
+        needsCompilerOptions = false;
+    }
+    else if ( _gccPath ) {
+        tool = *_gccPath;
     } else {
         // find compiler driver
-        gcc = sys::Program::FindProgramByName("gcc");
-        if ( gcc.isEmpty() ) {
+        tool = sys::Program::FindProgramByName("gcc");
+        if ( tool.isEmpty() ) {
             errMsg = "can't locate gcc";
             return true;
         }
@@ -235,8 +247,9 @@ bool LTOCodeGenerator::assemble(const std::string& asmPath,
     // build argument list
     std::vector<const char*> args;
     std::string targetTriple = _linker.getModule()->getTargetTriple();
-    args.push_back(gcc.c_str());
+    args.push_back(tool.c_str());
     if ( targetTriple.find("darwin") != targetTriple.size() ) {
+        // darwin specific command line options
         if (strncmp(targetTriple.c_str(), "i386-apple-", 11) == 0) {
             args.push_back("-arch");
             args.push_back("i386");
@@ -274,17 +287,22 @@ bool LTOCodeGenerator::assemble(const std::string& asmPath,
             args.push_back("-arch");
             args.push_back("armv6");
         }
+        // add -static to assembler command line when code model requires
+        if ( (_assemblerPath != NULL) && (_codeModel == LTO_CODEGEN_PIC_MODEL_STATIC) )
+            args.push_back("-static");
+    }
+    if ( needsCompilerOptions ) {
+        args.push_back("-c");
+        args.push_back("-x");
+        args.push_back("assembler");
     }
-    args.push_back("-c");
-    args.push_back("-x");
-    args.push_back("assembler");
     args.push_back("-o");
     args.push_back(objPath.c_str());
     args.push_back(asmPath.c_str());
     args.push_back(0);
 
     // invoke assembler
-    if ( sys::Program::ExecuteAndWait(gcc, &args[0], 0, 0, 0, 0, &errMsg) ) {
+    if ( sys::Program::ExecuteAndWait(tool, &args[0], 0, 0, 0, 0, &errMsg) ) {
         errMsg = "error in assembly";    
         return true;
     }
@@ -304,6 +322,20 @@ bool LTOCodeGenerator::determineTarget(std::string& errMsg)
         if ( march == NULL )
             return true;
 
+        // The relocation model is actually a static member of TargetMachine
+        // and needs to be set before the TargetMachine is instantiated.
+        switch( _codeModel ) {
+        case LTO_CODEGEN_PIC_MODEL_STATIC:
+            TargetMachine::setRelocationModel(Reloc::Static);
+            break;
+        case LTO_CODEGEN_PIC_MODEL_DYNAMIC:
+            TargetMachine::setRelocationModel(Reloc::PIC_);
+            break;
+        case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
+            TargetMachine::setRelocationModel(Reloc::DynamicNoPIC);
+            break;
+        }
+
         // construct LTModule, hand over ownership of module and target
         std::string FeatureStr =
           getFeatureString(_linker.getModule()->getTargetTriple().c_str());
@@ -363,19 +395,6 @@ bool LTOCodeGenerator::generateAssemblyCode(raw_ostream& out,
     if ( _target->getTargetAsmInfo()->doesSupportExceptionHandling() )
         llvm::ExceptionHandling = true;
 
-    // set codegen model
-    switch( _codeModel ) {
-        case LTO_CODEGEN_PIC_MODEL_STATIC:
-            _target->setRelocationModel(Reloc::Static);
-            break;
-        case LTO_CODEGEN_PIC_MODEL_DYNAMIC:
-            _target->setRelocationModel(Reloc::PIC_);
-            break;
-        case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
-            _target->setRelocationModel(Reloc::DynamicNoPIC);
-            break;
-    }
-
     // if options were requested, set them
     if ( !_codegenOptions.empty() )
         cl::ParseCommandLineOptions(_codegenOptions.size(), 
@@ -391,7 +410,6 @@ bool LTOCodeGenerator::generateAssemblyCode(raw_ostream& out,
     passes.add(new TargetData(*_target->getTargetData()));
     
     createStandardLTOPasses(&passes, /*Internalize=*/ false, !DisableInline,
-                            /*RunSecondGlobalOpt=*/ false, 
                             /*VerifyEach=*/ false);
 
     // Make sure everything is still good.
diff --git a/tools/lto/LTOCodeGenerator.h b/tools/lto/LTOCodeGenerator.h
index 57398b065003..e02a7ab1159f 100644
--- a/tools/lto/LTOCodeGenerator.h
+++ b/tools/lto/LTOCodeGenerator.h
@@ -37,6 +37,7 @@ public:
     bool                setDebugInfo(lto_debug_model, std::string& errMsg);
     bool                setCodePICModel(lto_codegen_model, std::string& errMsg);
     void                setGccPath(const char* path);
+    void                setAssemblerPath(const char* path);
     void                addMustPreserveSymbol(const char* sym);
     bool                writeMergedModules(const char* path, 
                                                            std::string& errMsg);
@@ -61,6 +62,7 @@ private:
     llvm::MemoryBuffer*         _nativeObjectFile;
     std::vector<const char*>    _codegenOptions;
     llvm::sys::Path*            _gccPath;
+    llvm::sys::Path*            _assemblerPath;
 };
 
 #endif // LTO_CODE_GENERATOR_H
diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp
index 5c3f90aa485d..7eb39ef21000 100644
--- a/tools/lto/lto.cpp
+++ b/tools/lto/lto.cpp
@@ -210,6 +210,14 @@ void lto_codegen_set_gcc_path(lto_code_gen_t cg, const char* path)
 }
 
 //
+// sets the path to the assembler tool
+//
+void lto_codegen_set_assembler_path(lto_code_gen_t cg, const char* path)
+{
+    cg->setAssemblerPath(path);
+}
+
+//
 // adds to a list of all global symbols that must exist in the final
 // generated code.  If a function is not listed there, it might be
 // inlined into every usage and optimized away.
diff --git a/tools/lto/lto.exports b/tools/lto/lto.exports
index aff755937b5b..01f43d1c36da 100644
--- a/tools/lto/lto.exports
+++ b/tools/lto/lto.exports
@@ -20,4 +20,5 @@ _lto_codegen_set_debug_model
 _lto_codegen_set_pic_model
 _lto_codegen_write_merged_modules
 _lto_codegen_debug_options
+_lto_codegen_set_assembler_path
 
diff --git a/utils/llvm.grm b/utils/llvm.grm
index 05083bfede6a..2ca849d67976 100644
--- a/utils/llvm.grm
+++ b/utils/llvm.grm
@@ -53,7 +53,8 @@ FPVAL               ::= ESAPINTVAL ^ "." ^ EUAPINTVAL | "0x" ^ HexDigitSeq ;
 The rest of this file is derived directly from llvmAsmParser.y.
 *)
 
-ArithmeticOps ::= add | sub | mul | udiv | sdiv | fdiv | urem | srem | frem ;
+ArithmeticOps ::= add | fadd | sub | fsub | mul | fmul |
+                  udiv | sdiv | fdiv | urem | srem | frem ;
 LogicalOps    ::= shl | lshr | ashr | and | or | xor;
 CastOps       ::= trunc | zext | sext | fptrunc | fpext | bitcast |
                   uitofp | sitofp | fptoui | fptosi | inttoptr | ptrtoint ;
diff --git a/utils/vim/llvm.vim b/utils/vim/llvm.vim
index 201d8ddddbeb..b4104f9dfe61 100644
--- a/utils/vim/llvm.vim
+++ b/utils/vim/llvm.vim
@@ -22,7 +22,8 @@ syn match   llvmType /\<i\d\+\>/
 " Instructions.
 " The true and false tokens can be used for comparison opcodes, but it's
 " much more common for these tokens to be used for boolean constants.
-syn keyword llvmStatement add sub mul sdiv udiv fdiv srem urem frem
+syn keyword llvmStatement add fadd sub fsub mul fmul
+syn keyword llvmStatement sdiv udiv fdiv srem urem frem
 syn keyword llvmStatement and or xor
 syn keyword llvmStatement icmp fcmp
 syn keyword llvmStatement eq ne ugt uge ult ule sgt sge slt sle
author	Ed Schouten <ed@FreeBSD.org>	2009-06-06 08:20:29 +0000
committer	Ed Schouten <ed@FreeBSD.org>	2009-06-06 08:20:29 +0000
commit	f4fe016fa15f703fe9c1b932d1e81e2c718521db (patch)
tree	8a1bbd1a5b838080d31e5c93a1817006b8c62318
parent	68eb509bdc5c7007520d5231cd92de28106236df (diff)