166 files changed, 3230 insertions, 4352 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 216e0f6c651d..96e06082fde6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -52,6 +52,7 @@ set(LLVM_ALL_TARGETS
   CellSPU
   CppBackend
   Mips
+  MBlaze
   MSIL
   MSP430
   PIC16
diff --git a/Makefile.config.in b/Makefile.config.in
index aca21e5b7d11..1b61f0908a8c 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -76,14 +76,14 @@ endif
 
 LLVMMAKE := $(LLVM_SRC_ROOT)/make
 
-PROJ_bindir     := $(DESTDIR)$(PROJ_prefix)/bin
-PROJ_libdir     := $(DESTDIR)$(PROJ_prefix)/lib
-PROJ_datadir    := $(DESTDIR)$(PROJ_prefix)/share
-PROJ_docsdir    := $(DESTDIR)$(PROJ_prefix)/docs/llvm
-PROJ_etcdir     := $(DESTDIR)$(PROJ_prefix)/etc/llvm
-PROJ_includedir := $(DESTDIR)$(PROJ_prefix)/include
-PROJ_infodir    := $(DESTDIR)$(PROJ_prefix)/info
-PROJ_mandir     := $(DESTDIR)$(PROJ_prefix)/share/man
+PROJ_bindir     := $(PROJ_prefix)/bin
+PROJ_libdir     := $(PROJ_prefix)/lib
+PROJ_datadir    := $(PROJ_prefix)/share
+PROJ_docsdir    := $(PROJ_prefix)/docs/llvm
+PROJ_etcdir     := $(PROJ_prefix)/etc/llvm
+PROJ_includedir := $(PROJ_prefix)/include
+PROJ_infodir    := $(PROJ_prefix)/info
+PROJ_mandir     := $(PROJ_prefix)/share/man
 
 # Determine if we're on a unix type operating system
 LLVM_ON_UNIX:=@LLVM_ON_UNIX@
diff --git a/Makefile.rules b/Makefile.rules
index b5b35256b794..fcddd50c3a28 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -781,7 +781,7 @@ ObjectsBC := $(BaseNameSources:%=$(ObjDir)/%.bc)
 # in the file so they get built before dependencies
 #---------------------------------------------------------
 
-$(PROJ_bindir) $(PROJ_libdir) $(PROJ_includedir) $(PROJ_etcdir)::
+$(DESTDIR)$(PROJ_bindir) $(DESTDIR)$(PROJ_libdir) $(DESTDIR)$(PROJ_includedir) $(DESTDIR)$(PROJ_etcdir)::
 	$(Verb) $(MKDIR) $@
 
 # To create other directories, as needed, and timestamp their creation
@@ -904,22 +904,22 @@ install-local::
 uninstall-local::
 	$(Echo) UnInstall circumvented with NO_INSTALL
 else
-install-local:: $(PROJ_etcdir) $(CONFIG_FILES)
-	$(Echo) Installing Configuration Files To $(PROJ_etcdir)
+install-local:: $(DESTDIR)$(PROJ_etcdir) $(CONFIG_FILES)
+	$(Echo) Installing Configuration Files To $(DESTDIR)$(PROJ_etcdir)
 	$(Verb)for file in $(CONFIG_FILES); do \
           if test -f $(PROJ_OBJ_DIR)/$${file} ; then \
-            $(DataInstall) $(PROJ_OBJ_DIR)/$${file} $(PROJ_etcdir) ; \
+            $(DataInstall) $(PROJ_OBJ_DIR)/$${file} $(DESTDIR)$(PROJ_etcdir) ; \
           elif test -f $(PROJ_SRC_DIR)/$${file} ; then \
-            $(DataInstall) $(PROJ_SRC_DIR)/$${file} $(PROJ_etcdir) ; \
+            $(DataInstall) $(PROJ_SRC_DIR)/$${file} $(DESTDIR)$(PROJ_etcdir) ; \
           else \
             $(ECHO) Error: cannot find config file $${file}. ; \
           fi \
 	done
 
 uninstall-local::
-	$(Echo) Uninstalling Configuration Files From $(PROJ_etcdir)
+	$(Echo) Uninstalling Configuration Files From $(DESTDIR)$(PROJ_etcdir)
 	$(Verb)for file in $(CONFIG_FILES); do \
-	  $(RM) -f $(PROJ_etcdir)/$${file} ; \
+	  $(RM) -f $(DESTDIR)$(PROJ_etcdir)/$${file} ; \
 	done
 endif
 
@@ -1011,7 +1011,7 @@ endif
 ifdef BYTECODE_DESTINATION
 ModuleDestDir := $(BYTECODE_DESTINATION)
 else
-ModuleDestDir := $(PROJ_libdir)
+ModuleDestDir := $(DESTDIR)$(PROJ_libdir)
 endif
 
 ifdef NO_INSTALL
@@ -1090,17 +1090,17 @@ install-local::
 uninstall-local::
 	$(Echo) Uninstall circumvented with NO_INSTALL
 else
-DestSharedLib = $(PROJ_libdir)/lib$(LIBRARYNAME)$(SHLIBEXT)
+DestSharedLib = $(DESTDIR)$(PROJ_libdir)/lib$(LIBRARYNAME)$(SHLIBEXT)
 
 install-local:: $(DestSharedLib)
 
-$(DestSharedLib): $(LibName.SO) $(PROJ_libdir)
+$(DestSharedLib): $(LibName.SO) $(DESTDIR)$(PROJ_libdir)
 	$(Echo) Installing $(BuildMode) Shared Library $(DestSharedLib)
 	$(Verb) $(INSTALL) $(LibName.SO) $(DestSharedLib)
 
 uninstall-local::
 	$(Echo) Uninstalling $(BuildMode) Shared Library $(DestSharedLib)
-	-$(Verb) $(RM) -f $(PROJ_libdir)/lib$(LIBRARYNAME).*
+	-$(Verb) $(RM) -f $(DESTDIR)$(PROJ_libdir)/lib$(LIBRARYNAME).*
 endif
 endif
 
@@ -1144,7 +1144,7 @@ endif
 ifdef BYTECODE_DESTINATION
 BytecodeDestDir := $(BYTECODE_DESTINATION)
 else
-BytecodeDestDir := $(PROJ_libdir)
+BytecodeDestDir := $(DESTDIR)$(PROJ_libdir)
 endif
 
 DestBytecodeLib = $(BytecodeDestDir)/lib$(LIBRARYNAME).bca
@@ -1209,13 +1209,13 @@ install-local::
 uninstall-local::
 	$(Echo) Uninstall circumvented with NO_INSTALL
 else
-DestArchiveLib := $(PROJ_libdir)/lib$(LIBRARYNAME).a
+DestArchiveLib := $(DESTDIR)$(PROJ_libdir)/lib$(LIBRARYNAME).a
 
 install-local:: $(DestArchiveLib)
 
-$(DestArchiveLib): $(LibName.A) $(PROJ_libdir)
+$(DestArchiveLib): $(LibName.A) $(DESTDIR)$(PROJ_libdir)
 	$(Echo) Installing $(BuildMode) Archive Library $(DestArchiveLib)
-	$(Verb) $(MKDIR) $(PROJ_libdir)
+	$(Verb) $(MKDIR) $(DESTDIR)$(PROJ_libdir)
 	$(Verb) $(INSTALL) $(LibName.A) $(DestArchiveLib)
 
 uninstall-local::
@@ -1315,11 +1315,11 @@ install-local::
 uninstall-local::
 	$(Echo) Uninstall circumvented with NO_INSTALL
 else
-DestTool = $(PROJ_bindir)/$(TOOLEXENAME)
+DestTool = $(DESTDIR)$(PROJ_bindir)/$(TOOLEXENAME)
 
 install-local:: $(DestTool)
 
-$(DestTool): $(ToolBuildPath) $(PROJ_bindir)
+$(DestTool): $(ToolBuildPath) $(DESTDIR)$(PROJ_bindir)
 	$(Echo) Installing $(BuildMode) $(DestTool)
 	$(Verb) $(ProgInstall) $(ToolBuildPath) $(DestTool)
 
@@ -1964,25 +1964,25 @@ uninstall-local::
 else
 install-local::
 	$(Echo) Installing include files
-	$(Verb) $(MKDIR) $(PROJ_includedir)
+	$(Verb) $(MKDIR) $(DESTDIR)$(PROJ_includedir)
 	$(Verb) if test -d "$(PROJ_SRC_ROOT)/include" ; then \
 	  cd $(PROJ_SRC_ROOT)/include && \
 	  for  hdr in `find . -type f '!' '(' -name '*~' \
 	      -o -name '.#*' -o -name '*.in' ')' -print | grep -v CVS | \
 	      grep -v .svn` ; do \
-	    instdir=`dirname "$(PROJ_includedir)/$$hdr"` ; \
+	    instdir=`dirname "$(DESTDIR)$(PROJ_includedir)/$$hdr"` ; \
 	    if test \! -d "$$instdir" ; then \
 	      $(EchoCmd) Making install directory $$instdir ; \
 	      $(MKDIR) $$instdir ;\
 	    fi ; \
-	    $(DataInstall) $$hdr $(PROJ_includedir)/$$hdr ; \
+	    $(DataInstall) $$hdr $(DESTDIR)$(PROJ_includedir)/$$hdr ; \
 	  done ; \
 	fi
 ifneq ($(PROJ_SRC_ROOT),$(PROJ_OBJ_ROOT))
 	$(Verb) if test -d "$(PROJ_OBJ_ROOT)/include" ; then \
 	  cd $(PROJ_OBJ_ROOT)/include && \
 	  for hdr in `find . -type f -print | grep -v CVS` ; do \
-	    $(DataInstall) $$hdr $(PROJ_includedir)/$$hdr ; \
+	    $(DataInstall) $$hdr $(DESTDIR)$(PROJ_includedir)/$$hdr ; \
 	  done ; \
 	fi
 endif
@@ -1994,10 +1994,10 @@ uninstall-local::
 	    $(RM) -f `find . -path '*/Internal' -prune -o '(' -type f \
 	      '!' '(' -name '*~' -o -name '.#*' \
         -o -name '*.in' ')' -print ')' | \
-        grep -v CVS | sed 's#^#$(PROJ_includedir)/#'` ; \
+        grep -v CVS | sed 's#^#$(DESTDIR)$(PROJ_includedir)/#'` ; \
 	  cd $(PROJ_SRC_ROOT)/include && \
 	    $(RM) -f `find . -path '*/Internal' -prune -o '(' -type f -name '*.in' \
-      -print ')' | sed 's#\.in$$##;s#^#$(PROJ_includedir)/#'` ; \
+      -print ')' | sed 's#\.in$$##;s#^#$(DESTDIR)$(PROJ_includedir)/#'` ; \
 	fi
 endif
 endif
diff --git a/bindings/ocaml/bitwriter/bitwriter_ocaml.c b/bindings/ocaml/bitwriter/bitwriter_ocaml.c
index 41aca259ca7e..53c93cbdfe9b 100644
--- a/bindings/ocaml/bitwriter/bitwriter_ocaml.c
+++ b/bindings/ocaml/bitwriter/bitwriter_ocaml.c
@@ -28,3 +28,18 @@ CAMLprim value llvm_write_bitcode_file(value M, value Path) {
   int res = LLVMWriteBitcodeToFile((LLVMModuleRef) M, String_val(Path));
   return Val_bool(res == 0);
 }
+
+/* ?unbuffered:bool -> Llvm.llmodule -> Unix.file_descr -> bool */
+CAMLprim value llvm_write_bitcode_to_fd(value U, value M, value FD) {
+  int Unbuffered;
+  int res;
+
+  if (U == Val_int(0)) {
+    Unbuffered = 0;
+  } else {
+    Unbuffered = Bool_val(Field(U,0));
+  }
+
+  res = LLVMWriteBitcodeToFD((LLVMModuleRef) M, Int_val(FD), 0, Unbuffered);
+  return Val_bool(res == 0);
+}
diff --git a/bindings/ocaml/bitwriter/llvm_bitwriter.ml b/bindings/ocaml/bitwriter/llvm_bitwriter.ml
index 7b45c53dbf82..3e69a3cc8fbb 100644
--- a/bindings/ocaml/bitwriter/llvm_bitwriter.ml
+++ b/bindings/ocaml/bitwriter/llvm_bitwriter.ml
@@ -16,3 +16,10 @@
 (* Writes the bitcode for module the given path. Returns true if successful. *)
 external write_bitcode_file : Llvm.llmodule -> string -> bool
                             = "llvm_write_bitcode_file"
+
+external write_bitcode_to_fd : ?unbuffered:bool -> Llvm.llmodule
+                               -> Unix.file_descr -> bool
+                             = "llvm_write_bitcode_to_fd"
+
+let output_bitcode ?unbuffered channel m =
+  write_bitcode_to_fd ?unbuffered m (Unix.descr_of_out_channel channel)
diff --git a/bindings/ocaml/bitwriter/llvm_bitwriter.mli b/bindings/ocaml/bitwriter/llvm_bitwriter.mli
index 2f782a1d9f6c..ea9a87600a75 100644
--- a/bindings/ocaml/bitwriter/llvm_bitwriter.mli
+++ b/bindings/ocaml/bitwriter/llvm_bitwriter.mli
@@ -16,3 +16,15 @@
     [path]. Returns [true] if successful, [false] otherwise. *)
 external write_bitcode_file : Llvm.llmodule -> string -> bool
                             = "llvm_write_bitcode_file"
+
+(** [write_bitcode_to_fd ~unbuffered fd m] writes the bitcode for module
+    [m] to the channel [c]. If [unbuffered] is [true], after every write the fd
+    will be flushed. Returns [true] if successful, [false] otherwise. *)
+external write_bitcode_to_fd : ?unbuffered:bool -> Llvm.llmodule
+                               -> Unix.file_descr -> bool
+                             = "llvm_write_bitcode_to_fd"
+
+(** [output_bitcode ~unbuffered c m] writes the bitcode for module [m]
+    to the channel [c]. If [unbuffered] is [true], after every write the fd
+    will be flushed. Returns [true] if successful, [false] otherwise. *)
+val output_bitcode : ?unbuffered:bool -> out_channel -> Llvm.llmodule -> bool
diff --git a/bindings/ocaml/executionengine/executionengine_ocaml.c b/bindings/ocaml/executionengine/executionengine_ocaml.c
index 1d3e57a705b8..5b1e32efefcd 100644
--- a/bindings/ocaml/executionengine/executionengine_ocaml.c
+++ b/bindings/ocaml/executionengine/executionengine_ocaml.c
@@ -91,7 +91,7 @@ CAMLprim value llvm_genericvalue_of_float(LLVMTypeRef Ty, value N) {
 }
 
 /* 'a -> t */
-CAMLprim value llvm_genericvalue_of_value(value V) {
+CAMLprim value llvm_genericvalue_of_pointer(value V) {
   CAMLparam1(V);
   CAMLreturn(alloc_generic_value(LLVMCreateGenericValueOfPointer(Op_val(V))));
 }
@@ -130,7 +130,7 @@ CAMLprim value llvm_genericvalue_as_float(LLVMTypeRef Ty, value GenVal) {
 }
 
 /* t -> 'a */
-CAMLprim value llvm_genericvalue_as_value(value GenVal) {
+CAMLprim value llvm_genericvalue_as_pointer(value GenVal) {
   return Val_op(LLVMGenericValueToPointer(Genericvalue_val(GenVal)));
 }
 
@@ -204,14 +204,14 @@ CAMLprim value llvm_ee_dispose(LLVMExecutionEngineRef EE) {
 }
 
 /* llmodule -> ExecutionEngine.t -> unit */
-CAMLprim value llvm_ee_add_mp(LLVMModuleRef M, LLVMExecutionEngineRef EE) {
+CAMLprim value llvm_ee_add_module(LLVMModuleRef M, LLVMExecutionEngineRef EE) {
   LLVMAddModule(EE, M);
   return Val_unit;
 }
 
 /* llmodule -> ExecutionEngine.t -> llmodule */
-CAMLprim LLVMModuleRef llvm_ee_remove_mp(LLVMModuleRef M,
-                                         LLVMExecutionEngineRef EE) {
+CAMLprim LLVMModuleRef llvm_ee_remove_module(LLVMModuleRef M,
+                                             LLVMExecutionEngineRef EE) {
   LLVMModuleRef RemovedModule;
   char *Error;
   if (LLVMRemoveModule(EE, M, &RemovedModule, &Error))
diff --git a/bindings/ocaml/executionengine/llvm_executionengine.ml b/bindings/ocaml/executionengine/llvm_executionengine.ml
index 95faa58cc548..a8535b246404 100644
--- a/bindings/ocaml/executionengine/llvm_executionengine.ml
+++ b/bindings/ocaml/executionengine/llvm_executionengine.ml
@@ -20,7 +20,7 @@ module GenericValue = struct
   external of_float: Llvm.lltype -> float -> t
     = "llvm_genericvalue_of_float"
   external of_pointer: 'a -> t
-    = "llvm_genericvalue_of_value"
+    = "llvm_genericvalue_of_pointer"
   external of_int32: Llvm.lltype -> int32 -> t
     = "llvm_genericvalue_of_int32"
   external of_int: Llvm.lltype -> int -> t
@@ -33,7 +33,7 @@ module GenericValue = struct
   external as_float: Llvm.lltype -> t -> float
     = "llvm_genericvalue_as_float"
   external as_pointer: t -> 'a
-    = "llvm_genericvalue_as_value"
+    = "llvm_genericvalue_as_pointer"
   external as_int32: t -> int32
     = "llvm_genericvalue_as_int32"
   external as_int: t -> int
@@ -65,9 +65,9 @@ module ExecutionEngine = struct
   external dispose: t -> unit
     = "llvm_ee_dispose"
   external add_module: Llvm.llmodule -> t -> unit
-    = "llvm_ee_add_mp"
+    = "llvm_ee_add_module"
   external remove_module: Llvm.llmodule -> t -> Llvm.llmodule
-    = "llvm_ee_remove_mp"
+    = "llvm_ee_remove_module"
   external find_function: string -> t -> Llvm.llvalue option
     = "llvm_ee_find_function"
   external run_function: Llvm.llvalue -> GenericValue.t array -> t ->
diff --git a/bindings/ocaml/executionengine/llvm_executionengine.mli b/bindings/ocaml/executionengine/llvm_executionengine.mli
index ac6665b2bc85..ce25f9d0ae09 100644
--- a/bindings/ocaml/executionengine/llvm_executionengine.mli
+++ b/bindings/ocaml/executionengine/llvm_executionengine.mli
@@ -25,57 +25,58 @@ module GenericValue: sig
   (** [of_float fpty n] boxes the float [n] in a float-valued generic value
       according to the floating point type [fpty]. See the fields
       [llvm::GenericValue::DoubleVal] and [llvm::GenericValue::FloatVal]. *)
-  val of_float: Llvm.lltype -> float -> t
+  external of_float : Llvm.lltype -> float -> t = "llvm_genericvalue_of_float"
   
   (** [of_pointer v] boxes the pointer value [v] in a generic value. See the
       field [llvm::GenericValue::PointerVal]. *)
-  val of_pointer: 'a -> t
+  external of_pointer : 'a -> t = "llvm_genericvalue_of_pointer"
   
   (** [of_int32 n w] boxes the int32 [i] in a generic value with the bitwidth
       [w]. See the field [llvm::GenericValue::IntVal]. *)
-  val of_int32: Llvm.lltype -> int32 -> t
+  external of_int32 : Llvm.lltype -> int32 -> t = "llvm_genericvalue_of_int32"
   
   (** [of_int n w] boxes the int [i] in a generic value with the bitwidth
       [w]. See the field [llvm::GenericValue::IntVal]. *)
-  val of_int: Llvm.lltype -> int -> t
+  external of_int : Llvm.lltype -> int -> t = "llvm_genericvalue_of_int"
   
   (** [of_natint n w] boxes the native int [i] in a generic value with the
       bitwidth [w]. See the field [llvm::GenericValue::IntVal]. *)
-  val of_nativeint: Llvm.lltype -> nativeint -> t
-  
+  external of_nativeint : Llvm.lltype -> nativeint -> t
+                        = "llvm_genericvalue_of_nativeint"
+
   (** [of_int64 n w] boxes the int64 [i] in a generic value with the bitwidth
       [w]. See the field [llvm::GenericValue::IntVal]. *)
-  val of_int64: Llvm.lltype -> int64 -> t
-  
+  external of_int64 : Llvm.lltype -> int64 -> t = "llvm_genericvalue_of_int64"
+
   (** [as_float fpty gv] unboxes the floating point-valued generic value [gv] of
       floating point type [fpty]. See the fields [llvm::GenericValue::DoubleVal]
       and [llvm::GenericValue::FloatVal]. *)
-  val as_float: Llvm.lltype -> t -> float
+  external as_float : Llvm.lltype -> t -> float = "llvm_genericvalue_as_float"
   
   (** [as_pointer gv] unboxes the pointer-valued generic value [gv]. See the
       field [llvm::GenericValue::PointerVal]. *)
-  val as_pointer: t -> 'a
+  external as_pointer : t -> 'a = "llvm_genericvalue_as_pointer"
   
   (** [as_int32 gv] unboxes the integer-valued generic value [gv] as an [int32].
       Is invalid if [gv] has a bitwidth greater than 32 bits. See the field
       [llvm::GenericValue::IntVal]. *)
-  val as_int32: t -> int32
+  external as_int32 : t -> int32 = "llvm_genericvalue_as_int32"
   
   (** [as_int gv] unboxes the integer-valued generic value [gv] as an [int].
       Is invalid if [gv] has a bitwidth greater than the host bit width (but the
       most significant bit may be lost). See the field
       [llvm::GenericValue::IntVal]. *)
-  val as_int: t -> int
+  external as_int : t -> int = "llvm_genericvalue_as_int"
   
   (** [as_natint gv] unboxes the integer-valued generic value [gv] as a
       [nativeint]. Is invalid if [gv] has a bitwidth greater than
       [nativeint]. See the field [llvm::GenericValue::IntVal]. *)
-  val as_nativeint: t -> nativeint
+  external as_nativeint : t -> nativeint = "llvm_genericvalue_as_nativeint"
   
   (** [as_int64 gv] returns the integer-valued generic value [gv] as an [int64].
       Is invalid if [gv] has a bitwidth greater than [int64]. See the field
       [llvm::GenericValue::IntVal]. *)
-  val as_int64: t -> int64
+  external as_int64 : t -> int64 = "llvm_genericvalue_as_int64"
 end
 
 
@@ -90,66 +91,72 @@ module ExecutionEngine: sig
       interpreter. Raises [Error msg] if an error occurrs. The execution engine
       is not garbage collected and must be destroyed with [dispose ee].
       See the function [llvm::EngineBuilder::create]. *)
-  val create: Llvm.llmodule -> t
+  external create : Llvm.llmodule -> t = "llvm_ee_create"
   
   (** [create_interpreter m] creates a new interpreter, taking ownership of the
       module [m] if successful. Raises [Error msg] if an error occurrs. The
       execution engine is not garbage collected and must be destroyed with
       [dispose ee].
       See the function [llvm::EngineBuilder::create]. *)
-  val create_interpreter: Llvm.llmodule -> t
+  external create_interpreter : Llvm.llmodule -> t = "llvm_ee_create_interpreter"
   
   (** [create_jit m optlevel] creates a new JIT (just-in-time compiler), taking
       ownership of the module [m] if successful with the desired optimization
       level [optlevel]. Raises [Error msg] if an error occurrs. The execution
       engine is not garbage collected and must be destroyed with [dispose ee].
       See the function [llvm::EngineBuilder::create]. *)
-  val create_jit : Llvm.llmodule -> int -> t
+  external create_jit : Llvm.llmodule -> int -> t = "llvm_ee_create_jit"
 
   (** [dispose ee] releases the memory used by the execution engine and must be
       invoked to avoid memory leaks. *)
-  val dispose: t -> unit
+  external dispose : t -> unit = "llvm_ee_dispose"
   
   (** [add_module m ee] adds the module [m] to the execution engine [ee]. *)
-  val add_module: Llvm.llmodule -> t -> unit
+  external add_module : Llvm.llmodule -> t -> unit = "llvm_ee_add_module"
   
   (** [remove_module m ee] removes the module [m] from the execution engine
       [ee], disposing of [m] and the module referenced by [mp]. Raises
       [Error msg] if an error occurs. *)
-  val remove_module: Llvm.llmodule -> t -> Llvm.llmodule
+  external remove_module : Llvm.llmodule -> t -> Llvm.llmodule
+                         = "llvm_ee_remove_module"
   
   (** [find_function n ee] finds the function named [n] defined in any of the
       modules owned by the execution engine [ee]. Returns [None] if the function
       is not found and [Some f] otherwise. *)
-  val find_function: string -> t -> Llvm.llvalue option
+  external find_function : string -> t -> Llvm.llvalue option
+                         = "llvm_ee_find_function"
   
   (** [run_function f args ee] synchronously executes the function [f] with the
       arguments [args], which must be compatible with the parameter types. *)
-  val run_function: Llvm.llvalue -> GenericValue.t array -> t ->
-                    GenericValue.t
+  external run_function : Llvm.llvalue -> GenericValue.t array -> t ->
+                     GenericValue.t
+                   = "llvm_ee_run_function"
   
   (** [run_static_ctors ee] executes the static constructors of each module in
       the execution engine [ee]. *)
-  val run_static_ctors: t -> unit
+  external run_static_ctors : t -> unit = "llvm_ee_run_static_ctors"
   
   (** [run_static_dtors ee] executes the static destructors of each module in
       the execution engine [ee]. *)
-  val run_static_dtors: t -> unit
+  external run_static_dtors : t -> unit = "llvm_ee_run_static_dtors"
   
   (** [run_function_as_main f args env ee] executes the function [f] as a main
       function, passing it [argv] and [argc] according to the string array
       [args], and [envp] as specified by the array [env]. Returns the integer
       return value of the function. *)
-  val run_function_as_main: Llvm.llvalue -> string array ->
-                            (string * string) array -> t -> int
+  external run_function_as_main : Llvm.llvalue -> string array ->
+                                  (string * string) array -> t -> int
+                                = "llvm_ee_run_function_as_main"
   
   (** [free_machine_code f ee] releases the memory in the execution engine [ee]
       used to store the machine code for the function [f]. *)
-  val free_machine_code: Llvm.llvalue -> t -> unit
+  external free_machine_code : Llvm.llvalue -> t -> unit
+                             = "llvm_ee_free_machine_code"
 
   (** [target_data ee] is the target data owned by the execution engine
       [ee]. *)
-  val target_data: t -> Llvm_target.TargetData.t
+  external target_data : t -> Llvm_target.TargetData.t
+                       = "LLVMGetExecutionEngineTargetData"
 end
 
 external initialize_native_target : unit -> bool
diff --git a/bindings/ocaml/llvm/llvm.ml b/bindings/ocaml/llvm/llvm.ml
index 407c1fc6c63f..e801c494713a 100644
--- a/bindings/ocaml/llvm/llvm.ml
+++ b/bindings/ocaml/llvm/llvm.ml
@@ -90,11 +90,13 @@ module Attribute = struct
   | Optsize
   | Ssp
   | Sspreq
+  | Alignment
   | Nocapture
   | Noredzone
   | Noimplicitfloat
   | Naked
   | Inlinehint
+  | Stackalignment
 end
 
 module Icmp = struct
diff --git a/bindings/ocaml/llvm/llvm.mli b/bindings/ocaml/llvm/llvm.mli
index aa5ea760c46d..4b0c06da03e4 100644
--- a/bindings/ocaml/llvm/llvm.mli
+++ b/bindings/ocaml/llvm/llvm.mli
@@ -139,11 +139,13 @@ module Attribute : sig
   | Optsize
   | Ssp
   | Sspreq
+  | Alignment
   | Nocapture
   | Noredzone
   | Noimplicitfloat
   | Naked
   | Inlinehint
+  | Stackalignment
 end
 
 (** The predicate for an integer comparison ([icmp]) instruction.
diff --git a/bindings/ocaml/transforms/scalar/llvm_scalar_opts.ml b/bindings/ocaml/transforms/scalar/llvm_scalar_opts.ml
index b4563b716a82..5699152b5a92 100644
--- a/bindings/ocaml/transforms/scalar/llvm_scalar_opts.ml
+++ b/bindings/ocaml/transforms/scalar/llvm_scalar_opts.ml
@@ -10,9 +10,38 @@
 external add_constant_propagation : [<Llvm.PassManager.any] Llvm.PassManager.t
                                     -> unit
                                   = "llvm_add_constant_propagation"
-external add_instruction_combining : [<Llvm.PassManager.any] Llvm.PassManager.t
-                                     -> unit
-                                   = "llvm_add_instruction_combining"
+external add_sccp : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                  = "llvm_add_sccp"
+external add_dead_store_elimination : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                      -> unit
+                                    = "llvm_add_dead_store_elimination"
+external add_aggressive_dce : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                            = "llvm_add_aggressive_dce"
+external
+add_scalar_repl_aggregation : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                            = "llvm_add_scalar_repl_aggregation"
+external add_ind_var_simplification : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                      -> unit
+                                    = "llvm_add_ind_var_simplification"
+external
+add_instruction_combination : [<Llvm.PassManager.any] Llvm.PassManager.t
+                              -> unit
+                            = "llvm_add_instruction_combination"
+external add_licm : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_licm"
+external add_loop_unswitch : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_loop_unswitch"
+external add_loop_unroll : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_loop_unroll"
+external add_loop_rotation : [<Llvm.PassManager.any] Llvm.PassManager.t
+                             -> unit
+                           = "llvm_add_loop_rotation"
+external add_loop_index_split : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_loop_index_split"
 external
 add_memory_to_register_promotion : [<Llvm.PassManager.any] Llvm.PassManager.t
                                    -> unit
@@ -21,12 +50,26 @@ external
 add_memory_to_register_demotion : [<Llvm.PassManager.any] Llvm.PassManager.t
                                   -> unit
                                 = "llvm_add_memory_to_register_demotion"
-external add_reassociation : [<Llvm.PassManager.any] Llvm.PassManager.t
-                             -> unit
+external add_reassociation : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
                            = "llvm_add_reassociation"
-external add_gvn : [<Llvm.PassManager.any] Llvm.PassManager.t
-                   -> unit
-                 = "llvm_add_gvn"
+external add_jump_threading : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_jump_threading"
 external add_cfg_simplification : [<Llvm.PassManager.any] Llvm.PassManager.t
                                   -> unit
                                 = "llvm_add_cfg_simplification"
+external
+add_tail_call_elimination : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                          = "llvm_add_tail_call_elimination" 
+external add_gvn : [<Llvm.PassManager.any] Llvm.PassManager.t
+                   -> unit
+                 = "llvm_add_gvn"
+external add_memcpy_opt : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_memcpy_opt"
+external add_loop_deletion : [<Llvm.PassManager.any] Llvm.PassManager.t
+                             -> unit
+                           = "llvm_add_loop_deletion"
+external
+add_lib_call_simplification : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                            = "llvm_add_lib_call_simplification"
diff --git a/bindings/ocaml/transforms/scalar/llvm_scalar_opts.mli b/bindings/ocaml/transforms/scalar/llvm_scalar_opts.mli
index 6fcce0432d71..9f95fbce9f89 100644
--- a/bindings/ocaml/transforms/scalar/llvm_scalar_opts.mli
+++ b/bindings/ocaml/transforms/scalar/llvm_scalar_opts.mli
@@ -17,10 +17,59 @@ external add_constant_propagation : [<Llvm.PassManager.any] Llvm.PassManager.t
                                     -> unit
                                   = "llvm_add_constant_propagation"
 
+(** See the [llvm::createSCCPPass] function. *)
+external add_sccp : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                  = "llvm_add_sccp"
+
+(** See [llvm::createDeadStoreEliminationPass] function. *)
+external add_dead_store_elimination : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                      -> unit
+                                    = "llvm_add_dead_store_elimination"
+
+(** See The [llvm::createAggressiveDCEPass] function. *)
+external add_aggressive_dce : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                            = "llvm_add_aggressive_dce"
+
+(** See the [llvm::createScalarReplAggregatesPass] function. *)
+external
+add_scalar_repl_aggregation : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                            = "llvm_add_scalar_repl_aggregation"
+
+(** See the [llvm::createIndVarSimplifyPass] function. *)
+external add_ind_var_simplification : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                      -> unit
+                                    = "llvm_add_ind_var_simplification"
+
 (** See the [llvm::createInstructionCombiningPass] function. *)
-external add_instruction_combining : [<Llvm.PassManager.any] Llvm.PassManager.t
-                                     -> unit
-                                   = "llvm_add_instruction_combining"
+external
+add_instruction_combination : [<Llvm.PassManager.any] Llvm.PassManager.t
+                              -> unit
+                            = "llvm_add_instruction_combination"
+
+(** See the [llvm::createLICMPass] function. *)
+external add_licm : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_licm"
+
+(** See the [llvm::createLoopUnswitchPass] function. *)
+external add_loop_unswitch : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_loop_unswitch"
+
+(** See the [llvm::createLoopUnrollPass] function. *)
+external add_loop_unroll : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_loop_unroll"
+
+(** See the [llvm::createLoopRotatePass] function. *)
+external add_loop_rotation : [<Llvm.PassManager.any] Llvm.PassManager.t
+                             -> unit
+                           = "llvm_add_loop_rotation"
+
+(** See the [llvm::createLoopIndexSplitPass] function. *)
+external add_loop_index_split : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_loop_index_split"
 
 (** See the [llvm::createPromoteMemoryToRegisterPass] function. *)
 external
@@ -35,16 +84,40 @@ add_memory_to_register_demotion : [<Llvm.PassManager.any] Llvm.PassManager.t
                                 = "llvm_add_memory_to_register_demotion"
 
 (** See the [llvm::createReassociatePass] function. *)
-external add_reassociation : [<Llvm.PassManager.any] Llvm.PassManager.t
-                             -> unit
+external add_reassociation : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
                            = "llvm_add_reassociation"
 
-(** See the [llvm::createGVNPass] function. *)
-external add_gvn : [<Llvm.PassManager.any] Llvm.PassManager.t
-                   -> unit
-                 = "llvm_add_gvn"
+(** See the [llvm::createJumpThreadingPass] function. *)
+external add_jump_threading : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_jump_threading"
 
 (** See the [llvm::createCFGSimplificationPass] function. *)
 external add_cfg_simplification : [<Llvm.PassManager.any] Llvm.PassManager.t
                                   -> unit
                                 = "llvm_add_cfg_simplification"
+
+(** See the [llvm::createTailCallEliminationPass] function. *)
+external
+add_tail_call_elimination : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                          = "llvm_add_tail_call_elimination" 
+
+(** See the [llvm::createGVNPass] function. *)
+external add_gvn : [<Llvm.PassManager.any] Llvm.PassManager.t
+                   -> unit
+                 = "llvm_add_gvn"
+
+(** See the [llvm::createMemCpyOptPass] function. *)
+external add_memcpy_opt : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_memcpy_opt"
+
+(** See the [llvm::createLoopDeletionPass] function. *)
+external add_loop_deletion : [<Llvm.PassManager.any] Llvm.PassManager.t
+                             -> unit
+                           = "llvm_add_loop_deletion"
+
+(** See the [llvm::createSimplifyLibCallsPass] function. *)
+external
+add_lib_call_simplification : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                            = "llvm_add_lib_call_simplification"
diff --git a/bindings/ocaml/transforms/scalar/scalar_opts_ocaml.c b/bindings/ocaml/transforms/scalar/scalar_opts_ocaml.c
index 87c106098432..c20bdde5753a 100644
--- a/bindings/ocaml/transforms/scalar/scalar_opts_ocaml.c
+++ b/bindings/ocaml/transforms/scalar/scalar_opts_ocaml.c
@@ -26,12 +26,72 @@ CAMLprim value llvm_add_constant_propagation(LLVMPassManagerRef PM) {
 }
 
 /* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
-CAMLprim value llvm_add_instruction_combining(LLVMPassManagerRef PM) {
+CAMLprim value llvm_add_sccp(LLVMPassManagerRef PM) {
+  LLVMAddSCCPPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_dead_store_elimination(LLVMPassManagerRef PM) {
+  LLVMAddDeadStoreEliminationPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_aggressive_dce(LLVMPassManagerRef PM) {
+  LLVMAddAggressiveDCEPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_scalar_repl_aggregation(LLVMPassManagerRef PM) {
+  LLVMAddScalarReplAggregatesPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_ind_var_simplification(LLVMPassManagerRef PM) {
+  LLVMAddIndVarSimplifyPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_instruction_combination(LLVMPassManagerRef PM) {
   LLVMAddInstructionCombiningPass(PM);
   return Val_unit;
 }
 
 /* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_licm(LLVMPassManagerRef PM) {
+  LLVMAddLICMPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_loop_unswitch(LLVMPassManagerRef PM) {
+  LLVMAddLoopUnrollPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_loop_unroll(LLVMPassManagerRef PM) {
+  LLVMAddLoopUnrollPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_loop_rotation(LLVMPassManagerRef PM) {
+  LLVMAddLoopRotatePass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_loop_index_split(LLVMPassManagerRef PM) {
+  LLVMAddLoopIndexSplitPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
 CAMLprim value llvm_add_memory_to_register_promotion(LLVMPassManagerRef PM) {
   LLVMAddPromoteMemoryToRegisterPass(PM);
   return Val_unit;
@@ -50,8 +110,8 @@ CAMLprim value llvm_add_reassociation(LLVMPassManagerRef PM) {
 }
 
 /* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
-CAMLprim value llvm_add_gvn(LLVMPassManagerRef PM) {
-  LLVMAddGVNPass(PM);
+CAMLprim value llvm_add_jump_threading(LLVMPassManagerRef PM) {
+  LLVMAddJumpThreadingPass(PM);
   return Val_unit;
 }
 
@@ -60,3 +120,33 @@ CAMLprim value llvm_add_cfg_simplification(LLVMPassManagerRef PM) {
   LLVMAddCFGSimplificationPass(PM);
   return Val_unit;
 }
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_tail_call_elimination(LLVMPassManagerRef PM) {
+  LLVMAddTailCallEliminationPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_gvn(LLVMPassManagerRef PM) {
+  LLVMAddGVNPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_memcpy_opt(LLVMPassManagerRef PM) {
+  LLVMAddMemCpyOptPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_loop_deletion(LLVMPassManagerRef PM) {
+  LLVMAddLoopDeletionPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_lib_call_simplification(LLVMPassManagerRef PM) {
+  LLVMAddSimplifyLibCallsPass(PM);
+  return Val_unit;
+}
diff --git a/docs/CommandGuide/Makefile b/docs/CommandGuide/Makefile
index 3b6518310726..2c2d0760e799 100644
--- a/docs/CommandGuide/Makefile
+++ b/docs/CommandGuide/Makefile
@@ -77,9 +77,9 @@ EXTRA_DIST := $(POD) index.html
 clean-local::
 	$(Verb) $(RM) -f pod2htm*.*~~ $(HTML) $(MAN) $(PS)
 
-HTML_DIR := $(PROJ_docsdir)/html/CommandGuide
-MAN_DIR  := $(PROJ_mandir)/man1
-PS_DIR   := $(PROJ_docsdir)/ps
+HTML_DIR := $(DESTDIR)$(PROJ_docsdir)/html/CommandGuide
+MAN_DIR  := $(DESTDIR)$(PROJ_mandir)/man1
+PS_DIR   := $(DESTDIR)$(PROJ_docsdir)/ps
 
 install-local:: $(HTML) $(INSTALL_MANS) $(PS)
 	$(Echo) Installing HTML CommandGuide Documentation
diff --git a/docs/LangRef.html b/docs/LangRef.html
index f3f73fa4f417..f7ecb76195e8 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -2520,6 +2520,23 @@ call void asm alignstack "eieio", ""()
    metadata nodes, which can be looked up in the module symbol table. For
    example: "<tt>!foo =  metadata !{!4, !3}</tt>".
 
+<p>Metadata can be used as function arguments. Here <tt>llvm.dbg.value</tt> 
+   function is using two metadata arguments.
+
+   <div class="doc_code">
+     <pre>
+       call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
+     </pre>
+   </div></p>
+
+<p>Metadata can be attached with an instruction. Here metadata <tt>!21</tt> is
+   attached with <tt>add</tt> instruction using <tt>!dbg</tt> identifier.
+
+  <div class="doc_code">
+    <pre>
+      %indvar.next = add i64 %indvar, 1, !dbg !21
+    </pre>
+  </div></p>
 </div>
 
 
@@ -7453,7 +7470,7 @@ LLVM</a>.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-03-02 07:36:51 +0100 (Tue, 02 Mar 2010) $
+  Last modified: $Date: 2010-03-05 00:44:48 +0100 (Fri, 05 Mar 2010) $
 </address>
 
 </body>
diff --git a/docs/Makefile b/docs/Makefile
index 5bfa6c3cfb3f..8f7d6171d3b3 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -48,11 +48,11 @@ generated:: doxygen ocamldoc
 
 install-html: $(PROJ_OBJ_DIR)/html.tar.gz
 	$(Echo) Installing HTML documentation
-	$(Verb) $(MKDIR) $(PROJ_docsdir)/html
-	$(Verb) $(MKDIR) $(PROJ_docsdir)/html/img
-	$(Verb) $(DataInstall) $(HTML) $(PROJ_docsdir)/html
-	$(Verb) $(DataInstall) $(IMAGES) $(PROJ_docsdir)/html/img
-	$(Verb) $(DataInstall) $(PROJ_OBJ_DIR)/html.tar.gz $(PROJ_docsdir)
+	$(Verb) $(MKDIR) $(DESTDIR)$(PROJ_docsdir)/html
+	$(Verb) $(MKDIR) $(DESTDIR)$(PROJ_docsdir)/html/img
+	$(Verb) $(DataInstall) $(HTML) $(DESTDIR)$(PROJ_docsdir)/html
+	$(Verb) $(DataInstall) $(IMAGES) $(DESTDIR)$(PROJ_docsdir)/html/img
+	$(Verb) $(DataInstall) $(PROJ_OBJ_DIR)/html.tar.gz $(DESTDIR)$(PROJ_docsdir)
 
 $(PROJ_OBJ_DIR)/html.tar.gz: $(HTML)
 	$(Echo) Packaging HTML documentation
@@ -63,11 +63,11 @@ $(PROJ_OBJ_DIR)/html.tar.gz: $(HTML)
 
 install-doxygen: doxygen
 	$(Echo) Installing doxygen documentation
-	$(Verb) $(MKDIR) $(PROJ_docsdir)/html/doxygen
-	$(Verb) $(DataInstall) $(PROJ_OBJ_DIR)/doxygen.tar.gz $(PROJ_docsdir)
+	$(Verb) $(MKDIR) $(DESTDIR)$(PROJ_docsdir)/html/doxygen
+	$(Verb) $(DataInstall) $(PROJ_OBJ_DIR)/doxygen.tar.gz $(DESTDIR)$(PROJ_docsdir)
 	$(Verb) cd $(PROJ_OBJ_DIR)/doxygen && \
 	  $(FIND) . -type f -exec \
-	    $(DataInstall) {} $(PROJ_docsdir)/html/doxygen \;
+	    $(DataInstall) {} $(DESTDIR)$(PROJ_docsdir)/html/doxygen \;
 
 doxygen: regendoc $(PROJ_OBJ_DIR)/doxygen.tar.gz
 
@@ -94,11 +94,11 @@ $(LLVM_SRC_ROOT)/docs/userloc.html:
 
 install-ocamldoc: ocamldoc
 	$(Echo) Installing ocamldoc documentation
-	$(Verb) $(MKDIR) $(PROJ_docsdir)/ocamldoc/html
-	$(Verb) $(DataInstall) $(PROJ_OBJ_DIR)/ocamldoc.tar.gz $(PROJ_docsdir)
+	$(Verb) $(MKDIR) $(DESTDIR)$(PROJ_docsdir)/ocamldoc/html
+	$(Verb) $(DataInstall) $(PROJ_OBJ_DIR)/ocamldoc.tar.gz $(DESTDIR)$(PROJ_docsdir)
 	$(Verb) cd $(PROJ_OBJ_DIR)/ocamldoc && \
 	  $(FIND) . -type f -exec \
-	    $(DataInstall) {} $(PROJ_docsdir)/ocamldoc/html \;
+	    $(DataInstall) {} $(DESTDIR)$(PROJ_docsdir)/ocamldoc/html \;
 
 ocamldoc: regen-ocamldoc
 	$(Echo) Packaging ocamldoc documentation
@@ -120,4 +120,4 @@ regen-ocamldoc:
 
 uninstall-local::
 	$(Echo) Uninstalling Documentation
-	$(Verb) $(RM) -rf $(PROJ_docsdir)
+	$(Verb) $(RM) -rf $(DESTDIR)$(PROJ_docsdir)
diff --git a/docs/tutorial/Makefile b/docs/tutorial/Makefile
index 6169bb82416b..9082ad4d8575 100644
--- a/docs/tutorial/Makefile
+++ b/docs/tutorial/Makefile
@@ -12,7 +12,7 @@ include $(LEVEL)/Makefile.common
 
 HTML       := $(wildcard $(PROJ_SRC_DIR)/*.html)
 EXTRA_DIST := $(HTML) index.html
-HTML_DIR   := $(PROJ_docsdir)/html/tutorial
+HTML_DIR   := $(DESTDIR)$(PROJ_docsdir)/html/tutorial
 
 install-local:: $(HTML)
 	$(Echo) Installing HTML Tutorial Documentation
diff --git a/include/llvm-c/BitWriter.h b/include/llvm-c/BitWriter.h
index 008ff9f2c106..bcbfb111492a 100644
--- a/include/llvm-c/BitWriter.h
+++ b/include/llvm-c/BitWriter.h
@@ -28,13 +28,16 @@ extern "C" {
 
 /*===-- Operations on modules ---------------------------------------------===*/
 
-/* Writes a module to an open file descriptor. Returns 0 on success.
-   Closes the Handle. Use dup first if this is not what you want. */ 
-int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int Handle);
-
-/* Writes a module to the specified path. Returns 0 on success. */ 
+/** Writes a module to the specified path. Returns 0 on success. */ 
 int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path);
 
+/** Writes a module to an open file descriptor. Returns 0 on success. */
+int LLVMWriteBitcodeToFD(LLVMModuleRef M, int FD, int ShouldClose,
+                         int Unbuffered);
+
+/** Deprecated for LLVMWriteBitcodeToFD. Writes a module to an open file
+    descriptor. Returns 0 on success. Closes the Handle. */ 
+int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int Handle);
 
 #ifdef __cplusplus
 }
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index 457436d12199..733b92c57c8d 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -112,11 +112,13 @@ typedef enum {
     LLVMOptimizeForSizeAttribute = 1<<13,
     LLVMStackProtectAttribute    = 1<<14,
     LLVMStackProtectReqAttribute = 1<<15,
+    LLVMAlignment = 31<<16,
     LLVMNoCaptureAttribute  = 1<<21,
     LLVMNoRedZoneAttribute  = 1<<22,
     LLVMNoImplicitFloatAttribute = 1<<23,
     LLVMNakedAttribute      = 1<<24,
-    LLVMInlineHintAttribute = 1<<25
+    LLVMInlineHintAttribute = 1<<25,
+    LLVMStackAlignment = 7<<26
 } LLVMAttribute;
 
 typedef enum {
diff --git a/include/llvm/ADT/ScopedHashTable.h b/include/llvm/ADT/ScopedHashTable.h
index f325e2b9f309..b5ca374a7ddd 100644
--- a/include/llvm/ADT/ScopedHashTable.h
+++ b/include/llvm/ADT/ScopedHashTable.h
@@ -139,7 +139,7 @@ public:
   }
 
   V lookup(const K &Key) {
-    return TopLevelMap[Key].getValue();
+    return TopLevelMap[Key]->getValue();
   }
 
   void insert(const K &Key, const V &Val) {
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index 7c673c3c2fbd..0791b7bd4c14 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -124,6 +124,10 @@ namespace llvm {
   /// character is included in the result string.
   bool GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset = 0,
                              bool StopAtNul = true);
+                        
+  /// GetStringLength - If we can compute the length of the string pointed to by
+  /// the specified pointer, return 'len+1'.  If we can't, return 0.
+  uint64_t GetStringLength(Value *V);
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index 66be26c950a7..d84f882bcd1b 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -419,6 +419,30 @@ private:
   void AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo);
 };
 
+/// MachineInstrExpressionTrait - Special DenseMapInfo traits to compare
+/// MachineInstr* by *value* of the instruction rather than by pointer value.
+/// The hashing and equality testing functions ignore definitions so this is
+/// useful for CSE, etc.
+struct MachineInstrExpressionTrait : DenseMapInfo<MachineInstr*> {
+  static inline MachineInstr *getEmptyKey() {
+    return 0;
+  }
+
+  static inline MachineInstr *getTombstoneKey() {
+    return reinterpret_cast<MachineInstr*>(-1);
+  }
+
+  static unsigned getHashValue(const MachineInstr* const &MI);
+
+  static bool isEqual(const MachineInstr* const &LHS,
+                      const MachineInstr* const &RHS) {
+    if (RHS == getEmptyKey() || RHS == getTombstoneKey() ||
+        LHS == getEmptyKey() || LHS == getTombstoneKey())
+      return LHS == RHS;
+    return LHS->isIdenticalTo(RHS, MachineInstr::IgnoreVRegDefs);
+  }
+};
+
 //===----------------------------------------------------------------------===//
 // Debugging Support
 
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index 01dc0184e250..f2e5e102223f 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -115,6 +115,10 @@ public:
   /// register.
   bool use_empty(unsigned RegNo) const { return use_begin(RegNo) == use_end(); }
 
+  /// hasOneUse - Return true if there is exactly one instruction using the
+  /// specified register.
+  bool hasOneUse(unsigned RegNo) const;
+
   /// use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the
   /// specified register, skipping those marked as Debug.
   typedef defusechain_iterator<true,false,true> use_nodbg_iterator;
@@ -129,6 +133,10 @@ public:
     return use_nodbg_begin(RegNo) == use_nodbg_end();
   }
 
+  /// hasOneNonDBGUse - Return true if there is exactly one non-Debug
+  /// instruction using the specified register.
+  bool hasOneNonDBGUse(unsigned RegNo) const;
+
   /// replaceRegWith - Replace all instances of FromReg with ToReg in the
   /// machine function.  This is like llvm-level X->replaceAllUsesWith(Y),
   /// except that it also changes any definitions of the register as well.
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index 23ff001b8d45..d9c1374a01d3 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -276,7 +276,6 @@ private:
   SDNode *Select_UNDEF(SDNode *N);
   SDNode *Select_EH_LABEL(SDNode *N);
   void CannotYetSelect(SDNode *N);
-  void CannotYetSelectIntrinsic(SDNode *N);
 
 private:
   void DoInstructionSelection();
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 950963ea1879..21a0b984b64c 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1027,11 +1027,15 @@ private:
   /// then they will be delete[]'d when the node is destroyed.
   uint16_t OperandsNeedDelete : 1;
 
+  /// HasDebugValue - This tracks whether this node has one or more dbg_value
+  /// nodes corresponding to it.
+  uint16_t HasDebugValue : 1;
+
 protected:
   /// SubclassData - This member is defined by this class, but is not used for
   /// anything.  Subclasses can use it to hold whatever state they find useful.
   /// This field is initialized to zero by the ctor.
-  uint16_t SubclassData : 15;
+  uint16_t SubclassData : 14;
 
 private:
   /// NodeId - Unique id per SDNode in the DAG.
@@ -1094,6 +1098,12 @@ public:
     return ~NodeType;
   }
 
+  /// getHasDebugValue - get this bit.
+  bool getHasDebugValue() const { return HasDebugValue; }
+
+  /// setHasDebugValue - set this bit.
+  void setHasDebugValue(bool b) { HasDebugValue = b; }
+
   /// use_empty - Return true if there are no uses of this node.
   ///
   bool use_empty() const { return UseList == NULL; }
@@ -1357,8 +1367,8 @@ protected:
 
   SDNode(unsigned Opc, const DebugLoc dl, SDVTList VTs, const SDValue *Ops,
          unsigned NumOps)
-    : NodeType(Opc), OperandsNeedDelete(true), SubclassData(0),
-      NodeId(-1),
+    : NodeType(Opc), OperandsNeedDelete(true), HasDebugValue(false),
+      SubclassData(0), NodeId(-1),
       OperandList(NumOps ? new SDUse[NumOps] : 0),
       ValueList(VTs.VTs), UseList(NULL),
       NumOperands(NumOps), NumValues(VTs.NumVTs),
diff --git a/include/llvm/CompilerDriver/Common.td b/include/llvm/CompilerDriver/Common.td
index 479bd6e12f0c..31a627d6273a 100644
--- a/include/llvm/CompilerDriver/Common.td
+++ b/include/llvm/CompilerDriver/Common.td
@@ -45,6 +45,7 @@ def hidden;
 def init;
 def multi_val;
 def one_or_more;
+def zero_or_more;
 def optional;
 def really_hidden;
 def required;
diff --git a/include/llvm/GlobalValue.h b/include/llvm/GlobalValue.h
index c15b55545166..658967d81a34 100644
--- a/include/llvm/GlobalValue.h
+++ b/include/llvm/GlobalValue.h
@@ -110,36 +110,51 @@ public:
     return ODR ? WeakODRLinkage : WeakAnyLinkage;
   }
 
-  bool hasExternalLinkage() const { return Linkage == ExternalLinkage; }
-  bool hasAvailableExternallyLinkage() const {
+  static bool isExternalLinkage(LinkageTypes Linkage) {
+    return Linkage == ExternalLinkage;
+  }
+  static bool isAvailableExternallyLinkage(LinkageTypes Linkage) {
     return Linkage == AvailableExternallyLinkage;
   }
-  bool hasLinkOnceLinkage() const {
+  static bool isLinkOnceLinkage(LinkageTypes Linkage) {
     return Linkage == LinkOnceAnyLinkage || Linkage == LinkOnceODRLinkage;
   }
-  bool hasWeakLinkage() const {
+  static bool isWeakLinkage(LinkageTypes Linkage) {
     return Linkage == WeakAnyLinkage || Linkage == WeakODRLinkage;
   }
-  bool hasAppendingLinkage() const { return Linkage == AppendingLinkage; }
-  bool hasInternalLinkage() const { return Linkage == InternalLinkage; }
-  bool hasPrivateLinkage() const { return Linkage == PrivateLinkage; }
-  bool hasLinkerPrivateLinkage() const { return Linkage==LinkerPrivateLinkage; }
-  bool hasLocalLinkage() const {
-    return hasInternalLinkage() || hasPrivateLinkage() ||
-      hasLinkerPrivateLinkage();
+  static bool isAppendingLinkage(LinkageTypes Linkage) {
+    return Linkage == AppendingLinkage;
+  }
+  static bool isInternalLinkage(LinkageTypes Linkage) {
+    return Linkage == InternalLinkage;
+  }
+  static bool isPrivateLinkage(LinkageTypes Linkage) {
+    return Linkage == PrivateLinkage;
+  }
+  static bool isLinkerPrivateLinkage(LinkageTypes Linkage) {
+    return Linkage==LinkerPrivateLinkage;
+  }
+  static bool isLocalLinkage(LinkageTypes Linkage) {
+    return isInternalLinkage(Linkage) || isPrivateLinkage(Linkage) ||
+      isLinkerPrivateLinkage(Linkage);
+  }
+  static bool isDLLImportLinkage(LinkageTypes Linkage) {
+    return Linkage == DLLImportLinkage;
+  }
+  static bool isDLLExportLinkage(LinkageTypes Linkage) {
+    return Linkage == DLLExportLinkage;
+  }
+  static bool isExternalWeakLinkage(LinkageTypes Linkage) {
+    return Linkage == ExternalWeakLinkage;
+  }
+  static bool isCommonLinkage(LinkageTypes Linkage) {
+    return Linkage == CommonLinkage;
   }
-  bool hasDLLImportLinkage() const { return Linkage == DLLImportLinkage; }
-  bool hasDLLExportLinkage() const { return Linkage == DLLExportLinkage; }
-  bool hasExternalWeakLinkage() const { return Linkage == ExternalWeakLinkage; }
-  bool hasCommonLinkage() const { return Linkage == CommonLinkage; }
-
-  void setLinkage(LinkageTypes LT) { Linkage = LT; }
-  LinkageTypes getLinkage() const { return Linkage; }
 
   /// mayBeOverridden - Whether the definition of this global may be replaced
   /// by something non-equivalent at link time.  For example, if a function has
   /// weak linkage then the code defining it may be replaced by different code.
-  bool mayBeOverridden() const {
+  static bool mayBeOverridden(LinkageTypes Linkage) {
     return (Linkage == WeakAnyLinkage ||
             Linkage == LinkOnceAnyLinkage ||
             Linkage == CommonLinkage ||
@@ -148,7 +163,7 @@ public:
 
   /// isWeakForLinker - Whether the definition of this global may be replaced at
   /// link time.
-  bool isWeakForLinker() const {
+  static bool isWeakForLinker(LinkageTypes Linkage)  {
     return (Linkage == AvailableExternallyLinkage ||
             Linkage == WeakAnyLinkage ||
             Linkage == WeakODRLinkage ||
@@ -158,6 +173,33 @@ public:
             Linkage == ExternalWeakLinkage);
   }
 
+  bool hasExternalLinkage() const { return isExternalLinkage(Linkage); }
+  bool hasAvailableExternallyLinkage() const {
+    return isAvailableExternallyLinkage(Linkage);
+  }
+  bool hasLinkOnceLinkage() const {
+    return isLinkOnceLinkage(Linkage);
+  }
+  bool hasWeakLinkage() const {
+    return isWeakLinkage(Linkage);
+  }
+  bool hasAppendingLinkage() const { return isAppendingLinkage(Linkage); }
+  bool hasInternalLinkage() const { return isInternalLinkage(Linkage); }
+  bool hasPrivateLinkage() const { return isPrivateLinkage(Linkage); }
+  bool hasLinkerPrivateLinkage() const { return isLinkerPrivateLinkage(Linkage); }
+  bool hasLocalLinkage() const { return isLocalLinkage(Linkage); }
+  bool hasDLLImportLinkage() const { return isDLLImportLinkage(Linkage); }
+  bool hasDLLExportLinkage() const { return isDLLExportLinkage(Linkage); }
+  bool hasExternalWeakLinkage() const { return isExternalWeakLinkage(Linkage); }
+  bool hasCommonLinkage() const { return isCommonLinkage(Linkage); }
+
+  void setLinkage(LinkageTypes LT) { Linkage = LT; }
+  LinkageTypes getLinkage() const { return Linkage; }
+
+  bool mayBeOverridden() const { return mayBeOverridden(Linkage); }
+
+  bool isWeakForLinker() const { return isWeakForLinker(Linkage); }
+
   /// copyAttributesFrom - copy all additional attributes (those not needed to
   /// create a GlobalValue) from the GlobalValue Src to this one.
   virtual void copyAttributesFrom(const GlobalValue *Src);
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index 1376e4664c20..881a0fea23ef 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -78,7 +78,9 @@
 
 // ALWAYS_INLINE - On compilers where we have a directive to do so, mark a
 // method "always inline" because it is performance sensitive.
-#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+// GCC 3.4 supported this but is buggy in various cases and produces
+// unimplemented errors, just use it in GCC 4.0 and later.
+#if __GNUC__ > 3
 #define ALWAYS_INLINE __attribute__((always_inline))
 #else
 // TODO: No idea how to do this with MSVC.
diff --git a/include/llvm/Support/IRBuilder.h b/include/llvm/Support/IRBuilder.h
index c8aef9c094b3..1f4e598990d6 100644
--- a/include/llvm/Support/IRBuilder.h
+++ b/include/llvm/Support/IRBuilder.h
@@ -143,6 +143,10 @@ public:
     return Type::getVoidTy(Context);
   }
   
+  const Type *getInt8PtrTy() {
+    return Type::getInt8PtrTy(Context);
+  }
+  
   /// getCurrentFunctionReturnType - Get the return type of the current function
   /// that we're emitting into.
   const Type *getCurrentFunctionReturnType() const;
diff --git a/include/llvm/Transforms/Utils/BuildLibCalls.h b/include/llvm/Transforms/Utils/BuildLibCalls.h
new file mode 100644
index 000000000000..03716a8eed73
--- /dev/null
+++ b/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -0,0 +1,96 @@
+//===- BuildLibCalls.h - Utility builder for libcalls -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes an interface to build some C language libcalls for
+// optimization passes that need to call the various functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TRANSFORMS_UTILS_BUILDLIBCALLS_H
+#define TRANSFORMS_UTILS_BUILDLIBCALLS_H
+
+#include "llvm/Support/IRBuilder.h"
+
+namespace llvm {
+  class Value;
+  class TargetData;
+  
+  /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
+  Value *CastToCStr(Value *V, IRBuilder<> &B);
+
+  /// EmitStrLen - Emit a call to the strlen function to the builder, for the
+  /// specified pointer.  Ptr is required to be some pointer type, and the
+  /// return value has 'intptr_t' type.
+  Value *EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD);
+
+  /// EmitStrChr - Emit a call to the strchr function to the builder, for the
+  /// specified pointer and character.  Ptr is required to be some pointer type,
+  /// and the return value has 'i8*' type.
+  Value *EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, const TargetData *TD);
+
+  /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
+  /// specified pointer arguments.
+  Value *EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+                    const TargetData *TD);
+  
+  /// EmitMemCpy - Emit a call to the memcpy function to the builder.  This
+  /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
+  Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len,
+                    unsigned Align, IRBuilder<> &B, const TargetData *TD);
+
+  /// EmitMemMove - Emit a call to the memmove function to the builder.  This
+  /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
+  Value *EmitMemMove(Value *Dst, Value *Src, Value *Len,
+		                 unsigned Align, IRBuilder<> &B, const TargetData *TD);
+
+  /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
+  /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
+  Value *EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
+                    const TargetData *TD);
+
+  /// EmitMemCmp - Emit a call to the memcmp function.
+  Value *EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+                    const TargetData *TD);
+
+  /// EmitMemSet - Emit a call to the memset function
+  Value *EmitMemSet(Value *Dst, Value *Val, Value *Len, IRBuilder<> &B,
+                    const TargetData *TD);
+
+  /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name'
+  /// (e.g.  'floor').  This function is known to take a single of type matching
+  /// 'Op' and returns one value with the same type.  If 'Op' is a long double,
+  /// 'l' is added as the suffix of name, if 'Op' is a float, we add a 'f'
+  /// suffix.
+  Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B,
+                              const AttrListPtr &Attrs);
+
+  /// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
+  /// is an integer.
+  Value *EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD);
+
+  /// EmitPutS - Emit a call to the puts function.  This assumes that Str is
+  /// some pointer.
+  void EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD);
+
+  /// EmitFPutC - Emit a call to the fputc function.  This assumes that Char is
+  /// an i32, and File is a pointer to FILE.
+  void EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
+                 const TargetData *TD);
+
+  /// EmitFPutS - Emit a call to the puts function.  Str is required to be a
+  /// pointer and File is a pointer to FILE.
+  void EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, const TargetData *TD);
+
+  /// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is
+  /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
+  void EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
+                  const TargetData *TD);
+}
+
+#endif
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 1f8053afe949..8288e96eb775 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -194,11 +194,10 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   const Type *ITy = GetCompareTy(LHS);
   
   // icmp X, X -> true/false
-  if (LHS == RHS)
+  // X icmp undef -> true/false.  For example, icmp ugt %X, undef -> false
+  // because X could be 0.
+  if (LHS == RHS || isa<UndefValue>(RHS))
     return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
-
-  if (isa<UndefValue>(RHS))                  // X icmp undef -> undef
-    return UndefValue::get(ITy);
   
   // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value
   // addresses never equal each other!  We already know that Op0 != Op1.
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index f5f10c8961b3..e27da9669247 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -15,6 +15,7 @@
 
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/STLExtras.h"
@@ -137,6 +138,10 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
   if (IP != BlockBegin) {
     --IP;
     for (; ScanLimit; --IP, --ScanLimit) {
+      // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+      // generated code.
+      if (isa<DbgInfoIntrinsic>(IP))
+        ScanLimit++;
       if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
           IP->getOperand(1) == RHS)
         return IP;
@@ -505,6 +510,10 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     if (IP != BlockBegin) {
       --IP;
       for (; ScanLimit; --IP, --ScanLimit) {
+        // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+        // generated code.
+        if (isa<DbgInfoIntrinsic>(IP))
+          ScanLimit++;
         if (IP->getOpcode() == Instruction::GetElementPtr &&
             IP->getOperand(0) == V && IP->getOperand(1) == Idx)
           return IP;
@@ -1258,8 +1267,19 @@ Value *SCEVExpander::expand(const SCEV *S) {
        L = L->getParentLoop())
     if (S->isLoopInvariant(L)) {
       if (!L) break;
-      if (BasicBlock *Preheader = L->getLoopPreheader())
+      if (BasicBlock *Preheader = L->getLoopPreheader()) {
         InsertPt = Preheader->getTerminator();
+        BasicBlock::iterator IP = InsertPt;
+        // Back past any debug info instructions.  Sometimes we inserted
+        // something earlier before debug info but after any real instructions.
+        // This should behave the same as if debug info was not present.
+        while (IP != Preheader->begin()) {
+          --IP;
+          if (!isa<DbgInfoIntrinsic>(IP))
+            break;
+          InsertPt = IP;
+        }
+      }
     } else {
       // If the SCEV is computable at this level, insert it into the header
       // after the PHIs (and after any other instructions that we've inserted
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 09344a32b860..92cbb7c95c03 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include <cstring>
 using namespace llvm;
 
@@ -1436,3 +1437,131 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
   // The array isn't null terminated, but maybe this is a memcpy, not a strcpy.
   return true;
 }
+
+// These next two are very similar to the above, but also look through PHI
+// nodes.
+// TODO: See if we can integrate these two together.
+
+/// GetStringLengthH - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'.  If we can't, return 0.
+static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
+  // Look through noop bitcast instructions.
+  if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+    return GetStringLengthH(BCI->getOperand(0), PHIs);
+
+  // If this is a PHI node, there are two cases: either we have already seen it
+  // or we haven't.
+  if (PHINode *PN = dyn_cast<PHINode>(V)) {
+    if (!PHIs.insert(PN))
+      return ~0ULL;  // already in the set.
+
+    // If it was new, see if all the input strings are the same length.
+    uint64_t LenSoFar = ~0ULL;
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs);
+      if (Len == 0) return 0; // Unknown length -> unknown.
+
+      if (Len == ~0ULL) continue;
+
+      if (Len != LenSoFar && LenSoFar != ~0ULL)
+        return 0;    // Disagree -> unknown.
+      LenSoFar = Len;
+    }
+
+    // Success, all agree.
+    return LenSoFar;
+  }
+
+  // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
+  if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+    uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
+    if (Len1 == 0) return 0;
+    uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs);
+    if (Len2 == 0) return 0;
+    if (Len1 == ~0ULL) return Len2;
+    if (Len2 == ~0ULL) return Len1;
+    if (Len1 != Len2) return 0;
+    return Len1;
+  }
+
+  // If the value is not a GEP instruction nor a constant expression with a
+  // GEP instruction, then return unknown.
+  User *GEP = 0;
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
+    GEP = GEPI;
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    if (CE->getOpcode() != Instruction::GetElementPtr)
+      return 0;
+    GEP = CE;
+  } else {
+    return 0;
+  }
+
+  // Make sure the GEP has exactly three arguments.
+  if (GEP->getNumOperands() != 3)
+    return 0;
+
+  // Check to make sure that the first operand of the GEP is an integer and
+  // has value 0 so that we are sure we're indexing into the initializer.
+  if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) {
+    if (!Idx->isZero())
+      return 0;
+  } else
+    return 0;
+
+  // If the second index isn't a ConstantInt, then this is a variable index
+  // into the array.  If this occurs, we can't say anything meaningful about
+  // the string.
+  uint64_t StartIdx = 0;
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
+    StartIdx = CI->getZExtValue();
+  else
+    return 0;
+
+  // The GEP instruction, constant or instruction, must reference a global
+  // variable that is a constant and is initialized. The referenced constant
+  // initializer is the array that we'll use for optimization.
+  GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
+  if (!GV || !GV->isConstant() || !GV->hasInitializer() ||
+      GV->mayBeOverridden())
+    return 0;
+  Constant *GlobalInit = GV->getInitializer();
+
+  // Handle the ConstantAggregateZero case, which is a degenerate case. The
+  // initializer is constant zero so the length of the string must be zero.
+  if (isa<ConstantAggregateZero>(GlobalInit))
+    return 1;  // Len = 0 offset by 1.
+
+  // Must be a Constant Array
+  ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
+  if (!Array || !Array->getType()->getElementType()->isIntegerTy(8))
+    return false;
+
+  // Get the number of elements in the array
+  uint64_t NumElts = Array->getType()->getNumElements();
+
+  // Traverse the constant array from StartIdx (derived above) which is
+  // the place the GEP refers to in the array.
+  for (unsigned i = StartIdx; i != NumElts; ++i) {
+    Constant *Elt = Array->getOperand(i);
+    ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+    if (!CI) // This array isn't suitable, non-int initializer.
+      return 0;
+    if (CI->isZero())
+      return i-StartIdx+1; // We found end of string, success!
+  }
+
+  return 0; // The array isn't null terminated, conservatively return 'unknown'.
+}
+
+/// GetStringLength - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'.  If we can't, return 0.
+uint64_t llvm::GetStringLength(Value *V) {
+  if (!V->getType()->isPointerTy()) return 0;
+
+  SmallPtrSet<PHINode*, 32> PHIs;
+  uint64_t Len = GetStringLengthH(V, PHIs);
+  // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
+  // an empty string as a length.
+  return Len == ~0ULL ? 1 : Len;
+}
diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp
index 7ed651b77e2e..428842246331 100644
--- a/lib/Bitcode/Writer/BitWriter.cpp
+++ b/lib/Bitcode/Writer/BitWriter.cpp
@@ -27,20 +27,14 @@ int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) {
   return 0;
 }
 
-#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR >= 4)
-#include <ext/stdio_filebuf.h>
-
-int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) {
-  raw_fd_ostream OS(FileHandle, false);
+int LLVMWriteBitcodeToFD(LLVMModuleRef M, int FD, int ShouldClose,
+                         int Unbuffered) {
+  raw_fd_ostream OS(FD, ShouldClose, Unbuffered);
   
   WriteBitcodeToFile(unwrap(M), OS);
   return 0;
 }
 
-#else
-
 int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) {
-  return -1; // Not supported.
+  return LLVMWriteBitcodeToFD(M, FileHandle, true, false);
 }
-
-#endif
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index faf4d9515af4..d94729ad7a89 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -334,7 +334,9 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
   unsigned TailLen = 0;
   while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
     --I1; --I2;
-    if (!I1->isIdenticalTo(I2) ||
+    // Don't merge debugging pseudos.
+    if (I1->isDebugValue() || I2->isDebugValue() ||
+        !I1->isIdenticalTo(I2) ||
         // FIXME: This check is dubious. It's used to get around a problem where
         // people incorrectly expect inline asm directives to remain in the same
         // relative order. This is untenable because normal compiler
@@ -412,6 +414,8 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
                                 MachineBasicBlock::iterator E) {
   unsigned Time = 0;
   for (; I != E; ++I) {
+    if (I->isDebugValue())
+      continue;
     const TargetInstrDesc &TID = I->getDesc();
     if (TID.isCall())
       Time += 10;
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 056e2d5b01e9..7d3de89ada2a 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -119,6 +119,8 @@ void CriticalAntiDepBreaker::FinishBlock() {
 
 void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
                                      unsigned InsertPosIndex) {
+  if (MI->isDebugValue())
+    return;
   assert(Count < InsertPosIndex && "Instruction index out of expected range!");
 
   // Any register which was defined within the previous scheduling region
@@ -409,6 +411,8 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits,
   for (MachineBasicBlock::iterator I = End, E = Begin;
        I != E; --Count) {
     MachineInstr *MI = --I;
+    if (MI->isDebugValue())
+      continue;
 
     // Check if this instruction has a dependence on the critical path that
     // is an anti-dependence that we may be able to break. If it is, set
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index fd442db27a27..5e888650bae4 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -67,7 +67,7 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
     cl::desc("Verify generated machine code"),
     cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
 
-static cl::opt<bool> EnableMachineCSE("machine-cse", cl::Hidden,
+static cl::opt<bool> EnableMachineCSE("enable-machine-cse", cl::Hidden,
     cl::desc("Enable Machine CSE"));
 
 static cl::opt<cl::boolOrDefault>
@@ -212,6 +212,12 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
   return false; // success!
 }
 
+static void printNoVerify(PassManagerBase &PM,
+                           const char *Banner) {
+  if (PrintMachineCode)
+    PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+}
+
 static void printAndVerify(PassManagerBase &PM,
                            const char *Banner,
                            bool allowDoubleDefs = false) {
@@ -320,10 +326,10 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
 
   if (OptLevel != CodeGenOpt::None) {
     PM.add(createOptimizeExtsPass());
-    if (EnableMachineCSE)
-      PM.add(createMachineCSEPass());
     if (!DisableMachineLICM)
       PM.add(createMachineLICMPass());
+    if (EnableMachineCSE)
+      PM.add(createMachineCSEPass());
     if (!DisableMachineSink)
       PM.add(createMachineSinkingPass());
     printAndVerify(PM, "After MachineLICM and MachineSinking",
@@ -378,13 +384,13 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   // Branch folding must be run after regalloc and prolog/epilog insertion.
   if (OptLevel != CodeGenOpt::None && !DisableBranchFold) {
     PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
-    printAndVerify(PM, "After BranchFolding");
+    printNoVerify(PM, "After BranchFolding");
   }
 
   // Tail duplication.
   if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) {
     PM.add(createTailDuplicatePass(false));
-    printAndVerify(PM, "After TailDuplicate");
+    printNoVerify(PM, "After TailDuplicate");
   }
 
   PM.add(createGCMachineCodeAnalysisPass());
@@ -394,11 +400,11 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
 
   if (OptLevel != CodeGenOpt::None && !DisableCodePlace) {
     PM.add(createCodePlacementOptPass());
-    printAndVerify(PM, "After CodePlacementOpt");
+    printNoVerify(PM, "After CodePlacementOpt");
   }
 
   if (addPreEmitPass(PM, OptLevel))
-    printAndVerify(PM, "After PreEmit passes");
+    printNoVerify(PM, "After PreEmit passes");
 
   return false;
 }
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 68c85394d4cd..519990e04a2c 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -365,27 +365,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
     }
   }
 
-  if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
-    if (LastPartDef)
-      // The last partial def kills the register.
-      LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
-                                                true/*IsImp*/, true/*IsKill*/));
-    else {
-      MachineOperand *MO =
-        LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI);
-      bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
-      // If the last reference is the last def, then it's not used at all.
-      // That is, unless we are currently processing the last reference itself.
-      LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
-      if (NeedEC) {
-        // If we are adding a subreg def and the superreg def is marked early
-        // clobber, add an early clobber marker to the subreg def.
-        MO = LastRefOrPartRef->findRegisterDefOperand(Reg);
-        if (MO)
-          MO->setIsEarlyClobber();
-      }
-    }
-  } else if (!PhysRegUse[Reg]) {
+  if (!PhysRegUse[Reg]) {
     // Partial uses. Mark register def dead and add implicit def of
     // sub-registers which are used.
     // EAX<dead>  = op  AL<imp-def>
@@ -419,6 +399,26 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
       for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
         PartUses.erase(*SS);
     }
+  } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
+    if (LastPartDef)
+      // The last partial def kills the register.
+      LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
+                                                true/*IsImp*/, true/*IsKill*/));
+    else {
+      MachineOperand *MO =
+        LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI);
+      bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
+      // If the last reference is the last def, then it's not used at all.
+      // That is, unless we are currently processing the last reference itself.
+      LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
+      if (NeedEC) {
+        // If we are adding a subreg def and the superreg def is marked early
+        // clobber, add an early clobber marker to the subreg def.
+        MO = LastRefOrPartRef->findRegisterDefOperand(Reg);
+        if (MO)
+          MO->setIsEarlyClobber();
+      }
+    }
   } else
     LastRefOrPartRef->addRegisterKilled(Reg, TRI, true);
   return true;
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 023ace2d219c..b376e3d05fee 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/ADT/ScopedHashTable.h"
 #include "llvm/ADT/Statistic.h"
@@ -25,76 +26,16 @@
 
 using namespace llvm;
 
-namespace llvm {
-  template<> struct DenseMapInfo<MachineInstr*> {
-    static inline MachineInstr *getEmptyKey() {
-      return 0;
-    }
-
-    static inline MachineInstr *getTombstoneKey() {
-      return reinterpret_cast<MachineInstr*>(-1);
-    }
-
-    static unsigned getHashValue(const MachineInstr* const &MI) {
-      unsigned Hash = MI->getOpcode() * 37;
-      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-        const MachineOperand &MO = MI->getOperand(i);
-        uint64_t Key = (uint64_t)MO.getType() << 32;
-        switch (MO.getType()) {
-        default: break;
-        case MachineOperand::MO_Register:
-          if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
-            continue;  // Skip virtual register defs.
-          Key |= MO.getReg();
-          break;
-        case MachineOperand::MO_Immediate:
-          Key |= MO.getImm();
-          break;
-        case MachineOperand::MO_FrameIndex:
-        case MachineOperand::MO_ConstantPoolIndex:
-        case MachineOperand::MO_JumpTableIndex:
-          Key |= MO.getIndex();
-          break;
-        case MachineOperand::MO_MachineBasicBlock:
-          Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB());
-          break;
-        case MachineOperand::MO_GlobalAddress:
-          Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal());
-          break;
-        case MachineOperand::MO_BlockAddress:
-          Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress());
-          break;
-        }
-        Key += ~(Key << 32);
-        Key ^= (Key >> 22);
-        Key += ~(Key << 13);
-        Key ^= (Key >> 8);
-        Key += (Key << 3);
-        Key ^= (Key >> 15);
-        Key += ~(Key << 27);
-        Key ^= (Key >> 31);
-        Hash = (unsigned)Key + Hash * 37;
-      }
-      return Hash;
-    }
-
-    static bool isEqual(const MachineInstr* const &LHS,
-                        const MachineInstr* const &RHS) {
-      if (RHS == getEmptyKey() || RHS == getTombstoneKey() ||
-          LHS == getEmptyKey() || LHS == getTombstoneKey())
-        return LHS == RHS;
-      return LHS->isIdenticalTo(RHS, MachineInstr::IgnoreVRegDefs);
-    }
-  };
-} // end llvm namespace
+STATISTIC(NumCoalesces, "Number of copies coalesced");
+STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
 
 namespace {
   class MachineCSE : public MachineFunctionPass {
     const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
     MachineRegisterInfo  *MRI;
     MachineDominatorTree *DT;
-    ScopedHashTable<MachineInstr*, unsigned> VNT;
-    unsigned CurrVN;
+    AliasAnalysis *AA;
   public:
     static char ID; // Pass identification
     MachineCSE() : MachineFunctionPass(&ID), CurrVN(0) {}
@@ -104,12 +45,22 @@ namespace {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
       MachineFunctionPass::getAnalysisUsage(AU);
+      AU.addRequired<AliasAnalysis>();
       AU.addRequired<MachineDominatorTree>();
       AU.addPreserved<MachineDominatorTree>();
     }
 
   private:
+    unsigned CurrVN;
+    ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT;
+    SmallVector<MachineInstr*, 64> Exps;
+
     bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
+    bool isPhysDefTriviallyDead(unsigned Reg,
+                                MachineBasicBlock::const_iterator I,
+                                MachineBasicBlock::const_iterator E);
+    bool hasLivePhysRegDefUse(MachineInstr *MI, MachineBasicBlock *MBB);
+    bool isCSECandidate(MachineInstr *MI);
     bool ProcessBlock(MachineDomTreeNode *Node);
   };
 } // end anonymous namespace
@@ -125,27 +76,65 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
   bool Changed = false;
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.isUse()) {
-      unsigned Reg = MO.getReg();
-      if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
-        continue;
-      MachineInstr *DefMI = MRI->getVRegDef(Reg);
-      if (DefMI->getParent() == MBB) {
-        unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
-        if (TII->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
-            TargetRegisterInfo::isVirtualRegister(SrcReg) &&
-            !SrcSubIdx && !DstSubIdx) {
-          MO.setReg(SrcReg);
-          Changed = true;
-        }
-      }
+    if (!MO.isReg() || !MO.isUse())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
+      continue;
+    if (!MRI->hasOneUse(Reg))
+      // Only coalesce single use copies. This ensure the copy will be
+      // deleted.
+      continue;
+    MachineInstr *DefMI = MRI->getVRegDef(Reg);
+    if (DefMI->getParent() != MBB)
+      continue;
+    unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+    if (TII->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
+        TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+        !SrcSubIdx && !DstSubIdx) {
+      MO.setReg(SrcReg);
+      DefMI->eraseFromParent();
+      ++NumCoalesces;
+      Changed = true;
     }
   }
 
   return Changed;
 }
 
-static bool hasLivePhysRegDefUse(MachineInstr *MI) {
+bool MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
+                                        MachineBasicBlock::const_iterator I,
+                                        MachineBasicBlock::const_iterator E) {
+  unsigned LookAheadLeft = 5;
+  while (LookAheadLeft--) {
+    if (I == E)
+      // Reached end of block, register is obviously dead.
+      return true;
+
+    if (I->isDebugValue())
+      continue;
+    bool SeenDef = false;
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = I->getOperand(i);
+      if (!MO.isReg() || !MO.getReg())
+        continue;
+      if (!TRI->regsOverlap(MO.getReg(), Reg))
+        continue;
+      if (MO.isUse())
+        return false;
+      SeenDef = true;
+    }
+    if (SeenDef)
+      // See a def of Reg (or an alias) before encountering any use, it's 
+      // trivially dead.
+      return true;
+    ++I;
+  }
+  return false;
+}
+
+bool MachineCSE::hasLivePhysRegDefUse(MachineInstr *MI, MachineBasicBlock *MBB){
+  unsigned PhysDef = 0;
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg())
@@ -153,30 +142,69 @@ static bool hasLivePhysRegDefUse(MachineInstr *MI) {
     unsigned Reg = MO.getReg();
     if (!Reg)
       continue;
-    if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
-        !(MO.isDef() && MO.isDead()))
+    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      if (MO.isUse())
+        // Can't touch anything to read a physical register.
+        return true;
+      if (MO.isDead())
+        // If the def is dead, it's ok.
+        continue;
+      // Ok, this is a physical register def that's not marked "dead". That's
+      // common since this pass is run before livevariables. We can scan
+      // forward a few instructions and check if it is obviously dead.
+      if (PhysDef)
+        // Multiple physical register defs. These are rare, forget about it.
+        return true;
+      PhysDef = Reg;
+    }
+  }
+
+  if (PhysDef) {
+    MachineBasicBlock::iterator I = MI; I = llvm::next(I);
+    if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end()))
       return true;
   }
   return false;
 }
 
+bool MachineCSE::isCSECandidate(MachineInstr *MI) {
+  // Ignore copies or instructions that read / write physical registers
+  // (except for dead defs of physical registers).
+  unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+  if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) ||
+      MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg())
+    return false;
+
+  // Ignore stuff that we obviously can't move.
+  const TargetInstrDesc &TID = MI->getDesc();  
+  if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
+      TID.hasUnmodeledSideEffects())
+    return false;
+
+  if (TID.mayLoad()) {
+    // Okay, this instruction does a load. As a refinement, we allow the target
+    // to decide whether the loaded value is actually a constant. If so, we can
+    // actually use it as a load.
+    if (!MI->isInvariantLoad(AA))
+      // FIXME: we should be able to hoist loads with no other side effects if
+      // there are no other instructions which can change memory in this loop.
+      // This is a trivial form of alias analysis.
+      return false;
+  }
+  return true;
+}
+
 bool MachineCSE::ProcessBlock(MachineDomTreeNode *Node) {
   bool Changed = false;
 
-  ScopedHashTableScope<MachineInstr*, unsigned> VNTS(VNT);
+  ScopedHashTableScope<MachineInstr*, unsigned,
+    MachineInstrExpressionTrait> VNTS(VNT);
   MachineBasicBlock *MBB = Node->getBlock();
-  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
-       ++I) {
+  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
     MachineInstr *MI = &*I;
-    bool SawStore = false;
-    if (!MI->isSafeToMove(TII, 0, SawStore))
-      continue;
-    // Ignore copies or instructions that read / write physical registers
-    // (except for dead defs of physical registers).
-    unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
-    if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
-      continue;
-    if (hasLivePhysRegDefUse(MI))
+    ++I;
+
+    if (!isCSECandidate(MI))
       continue;
 
     bool FoundCSE = VNT.count(MI);
@@ -185,11 +213,41 @@ bool MachineCSE::ProcessBlock(MachineDomTreeNode *Node) {
       if (PerformTrivialCoalescing(MI, MBB))
         FoundCSE = VNT.count(MI);
     }
+    // FIXME: commute commutable instructions?
+
+    // If the instruction defines a physical register and the value *may* be
+    // used, then it's not safe to replace it with a common subexpression.
+    if (FoundCSE && hasLivePhysRegDefUse(MI, MBB))
+      FoundCSE = false;
+
+    if (!FoundCSE) {
+      VNT.insert(MI, CurrVN++);
+      Exps.push_back(MI);
+      continue;
+    }
 
-    if (FoundCSE)
-      DEBUG(dbgs() << "Found a common subexpression: " << *MI);
-    else
-      VNT.insert(MI, ++CurrVN);
+    // Found a common subexpression, eliminate it.
+    unsigned CSVN = VNT.lookup(MI);
+    MachineInstr *CSMI = Exps[CSVN];
+    DEBUG(dbgs() << "Examining: " << *MI);
+    DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
+    unsigned NumDefs = MI->getDesc().getNumDefs();
+    for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+      unsigned OldReg = MO.getReg();
+      unsigned NewReg = CSMI->getOperand(i).getReg();
+      if (OldReg == NewReg)
+        continue;
+      assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
+             TargetRegisterInfo::isVirtualRegister(NewReg) &&
+             "Do not CSE physical register defs!");
+      MRI->replaceRegWith(OldReg, NewReg);
+      --NumDefs;
+    }
+    MI->eraseFromParent();
+    ++NumCSEs;
   }
 
   // Recursively call ProcessBlock with childred.
@@ -202,7 +260,9 @@ bool MachineCSE::ProcessBlock(MachineDomTreeNode *Node) {
 
 bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getTarget().getInstrInfo();
+  TRI = MF.getTarget().getRegisterInfo();
   MRI = &MF.getRegInfo();
   DT = &getAnalysis<MachineDominatorTree>();
+  AA = &getAnalysis<AliasAnalysis>();
   return ProcessBlock(DT->getRootNode());
 }
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index cba93f14a0bb..e23670d1d1d2 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -704,24 +704,31 @@ void MachineInstr::addMemOperand(MachineFunction &MF,
 
 bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
                                  MICheckType Check) const {
-    if (Other->getOpcode() != getOpcode() ||
-        Other->getNumOperands() != getNumOperands())
+  // If opcodes or number of operands are not the same then the two
+  // instructions are obviously not identical.
+  if (Other->getOpcode() != getOpcode() ||
+      Other->getNumOperands() != getNumOperands())
+    return false;
+
+  // Check operands to make sure they match.
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    const MachineOperand &OMO = Other->getOperand(i);
+    // Clients may or may not want to ignore defs when testing for equality.
+    // For example, machine CSE pass only cares about finding common
+    // subexpressions, so it's safe to ignore virtual register defs.
+    if (Check != CheckDefs && MO.isReg() && MO.isDef()) {
+      if (Check == IgnoreDefs)
+        continue;
+      // Check == IgnoreVRegDefs
+      if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+          TargetRegisterInfo::isPhysicalRegister(OMO.getReg()))
+        if (MO.getReg() != OMO.getReg())
+          return false;
+    } else if (!MO.isIdenticalTo(OMO))
       return false;
-    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = getOperand(i);
-      const MachineOperand &OMO = Other->getOperand(i);
-      if (Check != CheckDefs && MO.isReg() && MO.isDef()) {
-        if (Check == IgnoreDefs)
-          continue;
-        // Check == IgnoreVRegDefs
-        if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
-            TargetRegisterInfo::isPhysicalRegister(OMO.getReg()))
-          if (MO.getReg() != OMO.getReg())
-            return false;
-      } else if (!MO.isIdenticalTo(OMO))
-        return false;
-    }
-    return true;
+  }
+  return true;
 }
 
 /// removeFromParent - This method unlinks 'this' from the containing basic
@@ -1348,3 +1355,48 @@ void MachineInstr::addRegisterDefined(unsigned IncomingReg,
                                          true  /*IsDef*/,
                                          true  /*IsImp*/));
 }
+
+unsigned
+MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
+  unsigned Hash = MI->getOpcode() * 37;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    uint64_t Key = (uint64_t)MO.getType() << 32;
+    switch (MO.getType()) {
+      default: break;
+      case MachineOperand::MO_Register:
+        if (MO.isDef() && MO.getReg() &&
+            TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+          continue;  // Skip virtual register defs.
+        Key |= MO.getReg();
+        break;
+      case MachineOperand::MO_Immediate:
+        Key |= MO.getImm();
+        break;
+      case MachineOperand::MO_FrameIndex:
+      case MachineOperand::MO_ConstantPoolIndex:
+      case MachineOperand::MO_JumpTableIndex:
+        Key |= MO.getIndex();
+        break;
+      case MachineOperand::MO_MachineBasicBlock:
+        Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB());
+        break;
+      case MachineOperand::MO_GlobalAddress:
+        Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal());
+        break;
+      case MachineOperand::MO_BlockAddress:
+        Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress());
+        break;
+    }
+    Key += ~(Key << 32);
+    Key ^= (Key >> 22);
+    Key += ~(Key << 13);
+    Key ^= (Key >> 8);
+    Key += (Key << 3);
+    Key ^= (Key >> 15);
+    Key += ~(Key << 27);
+    Key ^= (Key >> 31);
+    Hash = (unsigned)Key + Hash * 37;
+  }
+  return Hash;
+}
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index b31973e04fd9..d9ab6773a53a 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -116,6 +116,19 @@ MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
   return 0;
 }
 
+bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const {
+  use_iterator UI = use_begin(RegNo);
+  if (UI == use_end())
+    return false;
+  return ++UI == use_end();
+}
+
+bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
+  use_nodbg_iterator UI = use_nodbg_begin(RegNo);
+  if (UI == use_nodbg_end())
+    return false;
+  return ++UI == use_nodbg_end();
+}
 
 #ifndef NDEBUG
 void MachineRegisterInfo::dumpUses(unsigned Reg) const {
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 9ba7d1486e0a..e47ba7c2cc37 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -72,8 +72,13 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
                                              MachineBasicBlock *MBB) const {
   assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
          "Only makes sense for vregs");
-  for (MachineRegisterInfo::use_iterator I = RegInfo->use_begin(Reg),
-       E = RegInfo->use_end(); I != E; ++I) {
+  // Ignoring debug uses is necessary so debug info doesn't affect the code.
+  // This may leave a referencing dbg_value in the original block, before
+  // the definition of the vreg.  Dwarf generator handles this although the
+  // user might not get the right info at runtime.
+  for (MachineRegisterInfo::use_nodbg_iterator I = 
+       RegInfo->use_nodbg_begin(Reg),
+       E = RegInfo->use_nodbg_end(); I != E; ++I) {
     // Determine the block of the use.
     MachineInstr *UseInst = &*I;
     MachineBasicBlock *UseBlock = UseInst->getParent();
@@ -135,7 +140,10 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
     ProcessedBegin = I == MBB.begin();
     if (!ProcessedBegin)
       --I;
-    
+
+    if (MI->isDebugValue())
+      continue;
+
     if (SinkInstruction(MI, SawStore))
       ++NumSunk, MadeChange = true;
     
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index bdfd448acdb2..8bbe0a725286 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -443,34 +443,3 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
 
   return NMBB;
 }
-
-unsigned
-PHIElimination::PHINodeTraits::getHashValue(const MachineInstr *MI) {
-  if (!MI || MI==getEmptyKey() || MI==getTombstoneKey())
-    return DenseMapInfo<MachineInstr*>::getHashValue(MI);
-  unsigned hash = 0;
-  for (unsigned ni = 1, ne = MI->getNumOperands(); ni != ne; ni += 2)
-    hash = hash*37 + DenseMapInfo<BBVRegPair>::
-      getHashValue(BBVRegPair(MI->getOperand(ni+1).getMBB()->getNumber(),
-                              MI->getOperand(ni).getReg()));
-  return hash;
-}
-
-bool PHIElimination::PHINodeTraits::isEqual(const MachineInstr *LHS,
-                                            const MachineInstr *RHS) {
-  const MachineInstr *EmptyKey = getEmptyKey();
-  const MachineInstr *TombstoneKey = getTombstoneKey();
-  if (!LHS || !RHS || LHS==EmptyKey || RHS==EmptyKey ||
-      LHS==TombstoneKey || RHS==TombstoneKey)
-    return LHS==RHS;
-
-  unsigned ne = LHS->getNumOperands();
-  if (ne != RHS->getNumOperands())
-      return false;
-  // Ignore operand 0, the defined register.
-  for (unsigned ni = 1; ni != ne; ni += 2)
-    if (LHS->getOperand(ni).getReg() != RHS->getOperand(ni).getReg() ||
-        LHS->getOperand(ni+1).getMBB() != RHS->getOperand(ni+1).getMBB())
-      return false;
-  return true;
-}
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
index ff4aa2091854..7dedf0318a8a 100644
--- a/lib/CodeGen/PHIElimination.h
+++ b/lib/CodeGen/PHIElimination.h
@@ -102,15 +102,9 @@ namespace llvm {
     // Defs of PHI sources which are implicit_def.
     SmallPtrSet<MachineInstr*, 4> ImpDefs;
 
-    // Lowered PHI nodes may be reused. We provide special DenseMap traits to
-    // match PHI nodes with identical arguments.
-    struct PHINodeTraits : public DenseMapInfo<MachineInstr*> {
-      static unsigned getHashValue(const MachineInstr *PtrVal);
-      static bool isEqual(const MachineInstr *LHS, const MachineInstr *RHS);
-    };
-
     // Map reusable lowered PHI node -> incoming join register.
-    typedef DenseMap<MachineInstr*, unsigned, PHINodeTraits> LoweredPHIMap;
+    typedef DenseMap<MachineInstr*, unsigned,
+                     MachineInstrExpressionTrait> LoweredPHIMap;
     LoweredPHIMap LoweredPHIs;
   };
 
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index f43395fa2efc..424181c02549 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -460,6 +460,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
   for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
        I != E; --Count) {
     MachineInstr *MI = --I;
+    if (MI->isDebugValue())
+      continue;
 
     // Update liveness.  Registers that are defed but not used in this
     // instruction are now dead. Mark register and all subregs as they
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 7fb3e6e6d2d2..5e86e5a9447e 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -18,19 +18,38 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Mutex.h"
 #include <map>
 using namespace llvm;
 
-static ManagedStatic<PseudoSourceValue[4]> PSVs;
+namespace {
+struct PSVGlobalsTy {
+  // PseudoSourceValues are immutable so don't need locking.
+  const PseudoSourceValue PSVs[4];
+  sys::Mutex Lock;  // Guards FSValues, but not the values inside it.
+  std::map<int, const PseudoSourceValue *> FSValues;
+
+  PSVGlobalsTy() : PSVs() {}
+  ~PSVGlobalsTy() {
+    for (std::map<int, const PseudoSourceValue *>::iterator
+           I = FSValues.begin(), E = FSValues.end(); I != E; ++I) {
+      delete I->second;
+    }
+  }
+};
+
+static ManagedStatic<PSVGlobalsTy> PSVGlobals;
+
+}  // anonymous namespace
 
 const PseudoSourceValue *PseudoSourceValue::getStack()
-{ return &(*PSVs)[0]; }
+{ return &PSVGlobals->PSVs[0]; }
 const PseudoSourceValue *PseudoSourceValue::getGOT()
-{ return &(*PSVs)[1]; }
+{ return &PSVGlobals->PSVs[1]; }
 const PseudoSourceValue *PseudoSourceValue::getJumpTable()
-{ return &(*PSVs)[2]; }
+{ return &PSVGlobals->PSVs[2]; }
 const PseudoSourceValue *PseudoSourceValue::getConstantPool()
-{ return &(*PSVs)[3]; }
+{ return &PSVGlobals->PSVs[3]; }
 
 static const char *const PSVNames[] = {
   "Stack",
@@ -48,13 +67,13 @@ PseudoSourceValue::PseudoSourceValue(enum ValueTy Subclass) :
         Subclass) {}
 
 void PseudoSourceValue::printCustom(raw_ostream &O) const {
-  O << PSVNames[this - *PSVs];
+  O << PSVNames[this - PSVGlobals->PSVs];
 }
 
-static ManagedStatic<std::map<int, const PseudoSourceValue *> > FSValues;
-
 const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) {
-  const PseudoSourceValue *&V = (*FSValues)[FI];
+  PSVGlobalsTy &PG = *PSVGlobals;
+  sys::ScopedLock locked(PG.Lock);
+  const PseudoSourceValue *&V = PG.FSValues[FI];
   if (!V)
     V = new FixedStackPseudoSourceValue(FI);
   return V;
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e4ff44d2a2ec..3be6b431167a 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1758,7 +1758,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N1.getValueType();
-  unsigned BitWidth = VT.getSizeInBits();
+  unsigned BitWidth = VT.getScalarType().getSizeInBits();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -1872,9 +1872,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     EVT MemVT = LN0->getMemoryVT();
     // If we zero all the possible extended bits, then we can turn this into
     // a zextload if we are running before legalize or the operation is legal.
-    unsigned BitWidth = N1.getValueSizeInBits();
+    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                                     BitWidth - MemVT.getSizeInBits())) &&
+                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
         ((!LegalOperations && !LN0->isVolatile()) ||
          TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
@@ -1895,9 +1895,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     EVT MemVT = LN0->getMemoryVT();
     // If we zero all the possible extended bits, then we can turn this into
     // a zextload if we are running before legalize or the operation is legal.
-    unsigned BitWidth = N1.getValueSizeInBits();
+    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                                     BitWidth - MemVT.getSizeInBits())) &&
+                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
         ((!LegalOperations && !LN0->isVolatile()) ||
          TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 02fe85dd996f..625de1143296 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -15,6 +15,7 @@
 
 #define DEBUG_TYPE "instr-emitter"
 #include "InstrEmitter.h"
+#include "SDDbgValue.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -497,6 +498,56 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
   assert(isNew && "Node emitted out of order - early");
 }
 
+/// EmitDbgValue - Generate any debug info that refers to this Node.  Constant
+/// dbg_value is not handled here.
+void
+InstrEmitter::EmitDbgValue(SDNode *Node,
+                           DenseMap<SDValue, unsigned> &VRBaseMap,
+                           SDDbgValue *sd) {
+  if (!Node->getHasDebugValue())
+    return;
+  if (!sd)
+    return;
+  unsigned VReg = getVR(SDValue(sd->getSDNode(), sd->getResNo()), VRBaseMap);
+  const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+  DebugLoc DL = sd->getDebugLoc();
+  MachineInstr *MI;
+  if (VReg) {
+    MI = BuildMI(*MF, DL, II).addReg(VReg, RegState::Debug).
+                              addImm(sd->getOffset()).
+                              addMetadata(sd->getMDPtr());
+  } else {
+    // Insert an Undef so we can see what we dropped.
+    MI = BuildMI(*MF, DL, II).addReg(0U).addImm(sd->getOffset()).
+                                    addMetadata(sd->getMDPtr());
+  }
+  MBB->insert(InsertPos, MI);
+}
+
+/// EmitDbgValue - Generate constant debug info.  No SDNode is involved.
+void
+InstrEmitter::EmitDbgValue(SDDbgValue *sd) {
+  if (!sd)
+    return;
+  const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+  DebugLoc DL = sd->getDebugLoc();
+  MachineInstr *MI;
+  Value *V = sd->getConst();
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    MI = BuildMI(*MF, DL, II).addImm(CI->getZExtValue()).
+                                   addImm(sd->getOffset()).
+                                   addMetadata(sd->getMDPtr());
+  } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+    MI = BuildMI(*MF, DL, II).addFPImm(CF).addImm(sd->getOffset()).
+                                   addMetadata(sd->getMDPtr());
+  } else {
+    // Insert an Undef so we can see what we dropped.
+    MI = BuildMI(*MF, DL, II).addReg(0U).addImm(sd->getOffset()).
+                                    addMetadata(sd->getMDPtr());
+  }
+  MBB->insert(InsertPos, MI);
+}
+
 /// EmitNode - Generate machine code for a node and needed dependencies.
 ///
 void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 91817e4d38a4..4fe9f19cc908 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -23,6 +23,7 @@
 namespace llvm {
 
 class TargetInstrDesc;
+class SDDbgValue;
 
 class InstrEmitter {
   MachineFunction *MF;
@@ -97,6 +98,16 @@ public:
   /// MachineInstr.
   static unsigned CountOperands(SDNode *Node);
 
+  /// EmitDbgValue - Generate any debug info that refers to this Node.  Constant
+  /// dbg_value is not handled here.
+  void EmitDbgValue(SDNode *Node,
+                    DenseMap<SDValue, unsigned> &VRBaseMap,
+                    SDDbgValue* sd);
+
+
+  /// EmitDbgValue - Generate a constant DBG_VALUE.  No node is involved.
+  void EmitDbgValue(SDDbgValue* sd);
+
   /// EmitNode - Generate machine code for a node and needed dependencies.
   ///
   void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index c7ab34f4e2ef..f498263b735f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2008,6 +2008,31 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     return Result;
   }
   assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+
+  // Implementation of unsigned i64 to f64 following the algorithm in
+  // __floatundidf in compiler_rt. This implementation has the advantage
+  // of performing rounding correctly, both in the default rounding mode
+  // and in all alternate rounding modes.
+  // TODO: Generalize this for use with other types.
+  if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) {
+    SDValue TwoP52 =
+      DAG.getConstant(UINT64_C(0x4330000000000000), MVT::i64);
+    SDValue TwoP84PlusTwoP52 =
+      DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), MVT::f64);
+    SDValue TwoP84 =
+      DAG.getConstant(UINT64_C(0x4530000000000000), MVT::i64);
+
+    SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32);
+    SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0,
+                             DAG.getConstant(32, MVT::i64));
+    SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);
+    SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
+    SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr);
+    SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr);
+    SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, TwoP84PlusTwoP52);
+    return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
+  }
+
   SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
 
   SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()),
diff --git a/lib/CodeGen/SelectionDAG/SDDbgValue.h b/lib/CodeGen/SelectionDAG/SDDbgValue.h
new file mode 100644
index 000000000000..9e15fc98bc4d
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SDDbgValue.h
@@ -0,0 +1,67 @@
+//===-- llvm/CodeGen/SDDbgValue.h - SD dbg_value handling--------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDDbgValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SDDBGVALUE_H
+#define LLVM_CODEGEN_SDDBGVALUE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DebugLoc.h"
+
+namespace llvm {
+
+class MDNode;
+class SDNode;
+class Value;
+
+/// SDDbgValue - Holds the information from a dbg_value node through SDISel.
+/// Either Const or Node is nonzero, but not both.
+/// We do not use SDValue here to avoid including its header.
+
+class SDDbgValue {
+  SDNode *Node;           // valid for non-constants
+  unsigned ResNo;         // valid for non-constants
+  Value *Const;           // valid for constants
+  MDNode *mdPtr;
+  uint64_t Offset;
+  DebugLoc DL;
+public:
+  // Constructor for non-constants.
+  SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl) :
+    Node(N), ResNo(R), Const(0), mdPtr(mdP), Offset(off), DL(dl) {}
+
+  // Constructor for constants.
+  SDDbgValue(MDNode *mdP, Value *C, uint64_t off, DebugLoc dl) : Node(0),
+    ResNo(0), Const(C), mdPtr(mdP), Offset(off), DL(dl) {}
+
+  // Returns the MDNode pointer.
+  MDNode *getMDPtr() { return mdPtr; }
+
+  // Returns the SDNode* (valid for non-constants only).
+  SDNode *getSDNode() { assert (!Const); return Node; }
+
+  // Returns the ResNo (valid for non-constants only).
+  unsigned getResNo() { assert (!Const); return ResNo; }
+
+  // Returns the Value* for a constant (invalid for non-constants).
+  Value *getConst() { assert (!Node); return Const; }
+
+  // Returns the offset.
+  uint64_t getOffset() { return Offset; }
+
+  // Returns the DebugLoc.
+  DebugLoc getDebugLoc() { return DL; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 002bc682c489..023e486db311 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4869,6 +4869,43 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
   return NULL;
 }
 
+namespace {
+
+/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node
+/// pointed to by a use iterator is deleted, increment the use iterator
+/// so that it doesn't dangle.
+///
+/// This class also manages a "downlink" DAGUpdateListener, to forward
+/// messages to ReplaceAllUsesWith's callers.
+///
+class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener {
+  SelectionDAG::DAGUpdateListener *DownLink;
+  SDNode::use_iterator &UI;
+  SDNode::use_iterator &UE;
+
+  virtual void NodeDeleted(SDNode *N, SDNode *E) {
+    // Increment the iterator as needed.
+    while (UI != UE && N == *UI)
+      ++UI;
+
+    // Then forward the message.
+    if (DownLink) DownLink->NodeDeleted(N, E);
+  }
+
+  virtual void NodeUpdated(SDNode *N) {
+    // Just forward the message.
+    if (DownLink) DownLink->NodeUpdated(N);
+  }
+
+public:
+  RAUWUpdateListener(SelectionDAG::DAGUpdateListener *dl,
+                     SDNode::use_iterator &ui,
+                     SDNode::use_iterator &ue)
+    : DownLink(dl), UI(ui), UE(ue) {}
+};
+
+}
+
 /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
 /// This can cause recursive merging of nodes in the DAG.
 ///
@@ -4889,6 +4926,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
   // is replaced by To, we don't want to replace of all its users with To
   // too. See PR3018 for more info.
   SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+  RAUWUpdateListener Listener(UpdateListener, UI, UE);
   while (UI != UE) {
     SDNode *User = *UI;
 
@@ -4907,7 +4945,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
 
     // Now that we have modified User, add it back to the CSE maps.  If it
     // already exists there, recursively merge the results together.
-    AddModifiedNodeToCSEMaps(User, UpdateListener);
+    AddModifiedNodeToCSEMaps(User, &Listener);
   }
 }
 
@@ -4933,6 +4971,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
   // Iterate over just the existing users of From. See the comments in
   // the ReplaceAllUsesWith above.
   SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+  RAUWUpdateListener Listener(UpdateListener, UI, UE);
   while (UI != UE) {
     SDNode *User = *UI;
 
@@ -4951,7 +4990,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
 
     // Now that we have modified User, add it back to the CSE maps.  If it
     // already exists there, recursively merge the results together.
-    AddModifiedNodeToCSEMaps(User, UpdateListener);
+    AddModifiedNodeToCSEMaps(User, &Listener);
   }
 }
 
@@ -4969,6 +5008,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
   // Iterate over just the existing users of From. See the comments in
   // the ReplaceAllUsesWith above.
   SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+  RAUWUpdateListener Listener(UpdateListener, UI, UE);
   while (UI != UE) {
     SDNode *User = *UI;
 
@@ -4988,7 +5028,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
 
     // Now that we have modified User, add it back to the CSE maps.  If it
     // already exists there, recursively merge the results together.
-    AddModifiedNodeToCSEMaps(User, UpdateListener);
+    AddModifiedNodeToCSEMaps(User, &Listener);
   }
 }
 
@@ -5010,6 +5050,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
   // the ReplaceAllUsesWith above.
   SDNode::use_iterator UI = From.getNode()->use_begin(),
                        UE = From.getNode()->use_end();
+  RAUWUpdateListener Listener(UpdateListener, UI, UE);
   while (UI != UE) {
     SDNode *User = *UI;
     bool UserRemovedFromCSEMaps = false;
@@ -5045,7 +5086,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
 
     // Now that we have modified User, add it back to the CSE maps.  If it
     // already exists there, recursively merge the results together.
-    AddModifiedNodeToCSEMaps(User, UpdateListener);
+    AddModifiedNodeToCSEMaps(User, &Listener);
   }
 }
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 2e2020d6183f..05f9f1ff1860 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -56,9 +56,12 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
 #include <algorithm>
 using namespace llvm;
 
+STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+
 static cl::opt<bool>
 EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
           cl::desc("Enable verbose messages in the \"fast\" "
@@ -930,6 +933,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
         // feed PHI nodes in successor blocks.
         if (isa<TerminatorInst>(BI))
           if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) {
+            ++NumFastIselFailures;
             ResetDebugLoc(SDB, FastIS);
             if (EnableFastISelVerbose || EnableFastISelAbort) {
               dbgs() << "FastISel miss: ";
@@ -954,6 +958,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
 
         // Then handle certain instructions as single-LLVM-Instruction blocks.
         if (isa<CallInst>(BI)) {
+          ++NumFastIselFailures;
           if (EnableFastISelVerbose || EnableFastISelAbort) {
             dbgs() << "FastISel missed call: ";
             BI->dump();
@@ -983,6 +988,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
         // Otherwise, give up on FastISel for the rest of the block.
         // For now, be a little lenient about non-branch terminators.
         if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) {
+          ++NumFastIselFailures;
           if (EnableFastISelVerbose || EnableFastISelAbort) {
             dbgs() << "FastISel miss: ";
             BI->dump();
@@ -1032,6 +1038,8 @@ SelectionDAGISel::FinishBasicBlock() {
       MachineInstr *PHI = SDB->PHINodesToUpdate[i].first;
       assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
+      if (!BB->isSuccessor(PHI->getParent()))
+        continue;
       PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second,
                                                 false));
       PHI->addOperand(MachineOperand::CreateMBB(BB));
@@ -1414,21 +1422,6 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
   return false;
 }
 
-/// isNonImmUse - Start searching from Root up the DAG to check is Def can
-/// be reached. Return true if that's the case. However, ignore direct uses
-/// by ImmedUse (which would be U in the example illustrated in
-/// IsLegalToFold) and by Root (which can happen in the store case).
-/// FIXME: to be really generic, we should allow direct use by any node
-/// that is being folded. But realisticly since we only fold loads which
-/// have one non-chain use, we only need to watch out for load/op/store
-/// and load/op/cmp case where the root (store / cmp) may reach the load via
-/// its chain operand.
-static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse,
-                               bool IgnoreChains) {
-  SmallPtrSet<SDNode*, 16> Visited;
-  return findNonImmUse(Root, Def, ImmedUse, Root, Visited, IgnoreChains);
-}
-
 /// IsProfitableToFold - Returns true if it's profitable to fold the specific
 /// operand node N of U during instruction selection that starts at Root.
 bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
@@ -1485,6 +1478,8 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
   // Fold. But since Fold and FU are flagged together, this will create
   // a cycle in the scheduling graph.
 
+  // If the node has flags, walk down the graph to the "lowest" node in the
+  // flagged set.
   EVT VT = Root->getValueType(Root->getNumValues()-1);
   while (VT == MVT::Flag) {
     SDNode *FU = findFlagUse(Root);
@@ -1492,9 +1487,17 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
       break;
     Root = FU;
     VT = Root->getValueType(Root->getNumValues()-1);
+    
+    // If our query node has a flag result with a use, we've walked up it.  If
+    // the user (which has already been selected) has a chain or indirectly uses
+    // the chain, our WalkChainUsers predicate will not consider it.  Because of
+    // this, we cannot ignore chains in this predicate.
+    IgnoreChains = false;
   }
+  
 
-  return !isNonImmUse(Root, N.getNode(), U, IgnoreChains);
+  SmallPtrSet<SDNode*, 16> Visited;
+  return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
 }
 
 SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
@@ -2249,11 +2252,15 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
                                 N.getNode()))
         break;
       continue;
-    case OPC_CheckComplexPat:
-      if (!CheckComplexPattern(NodeToMatch, N, 
-                               MatcherTable[MatcherIndex++], RecordedNodes))
+    case OPC_CheckComplexPat: {
+      unsigned CPNum = MatcherTable[MatcherIndex++];
+      unsigned RecNo = MatcherTable[MatcherIndex++];
+      assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
+      if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo], CPNum,
+                               RecordedNodes))
         break;
       continue;
+    }
     case OPC_CheckOpcode:
       if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;
       continue;
@@ -2711,29 +2718,26 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
 
 
 void SelectionDAGISel::CannotYetSelect(SDNode *N) {
-  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN ||
-      N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
-      N->getOpcode() == ISD::INTRINSIC_VOID)
-    return CannotYetSelectIntrinsic(N);
-  
   std::string msg;
   raw_string_ostream Msg(msg);
   Msg << "Cannot yet select: ";
-  N->printrFull(Msg, CurDAG);
+  
+  if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN &&
+      N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
+      N->getOpcode() != ISD::INTRINSIC_VOID) {
+    N->printrFull(Msg, CurDAG);
+  } else {
+    bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other;
+    unsigned iid =
+      cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue();
+    if (iid < Intrinsic::num_intrinsics)
+      Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid);
+    else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo())
+      Msg << "target intrinsic %" << TII->getName(iid);
+    else
+      Msg << "unknown intrinsic #" << iid;
+  }
   llvm_report_error(Msg.str());
 }
 
-void SelectionDAGISel::CannotYetSelectIntrinsic(SDNode *N) {
-  dbgs() << "Cannot yet select: ";
-  unsigned iid =
-    cast<ConstantSDNode>(N->getOperand(N->getOperand(0).getValueType() ==
-                                       MVT::Other))->getZExtValue();
-  if (iid < Intrinsic::num_intrinsics)
-    llvm_report_error("Cannot yet select: intrinsic %" +
-                      Intrinsic::getName((Intrinsic::ID)iid));
-  else if (const TargetIntrinsicInfo *tii = TM.getIntrinsicInfo())
-    llvm_report_error(Twine("Cannot yet select: target intrinsic %") +
-                      tii->getName(iid));
-}
-
 char SelectionDAGISel::ID = 0;
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 8d4d1b21dd81..059e8d6c19aa 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -44,7 +44,6 @@ namespace {
     const Type *FunctionContextTy;
     Constant *RegisterFn;
     Constant *UnregisterFn;
-    Constant *ResumeFn;
     Constant *BuiltinSetjmpFn;
     Constant *FrameAddrFn;
     Constant *LSDAAddrFn;
@@ -67,8 +66,8 @@ namespace {
     }
 
   private:
-    void markInvokeCallSite(InvokeInst *II, unsigned InvokeNo,
-                            Value *CallSite,
+    void insertCallSiteStore(Instruction *I, int Number, Value *CallSite);
+    void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite,
                             SwitchInst *CatchSwitch);
     void splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes);
     bool insertSjLjEHSupport(Function &F);
@@ -107,11 +106,6 @@ bool SjLjEHPass::doInitialization(Module &M) {
                           Type::getVoidTy(M.getContext()),
                           PointerType::getUnqual(FunctionContextTy),
                           (Type *)0);
-  ResumeFn =
-    M.getOrInsertFunction("_Unwind_SjLj_Resume",
-                          Type::getVoidTy(M.getContext()),
-                          VoidPtrTy,
-                          (Type *)0);
   FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
   BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp);
   LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
@@ -123,12 +117,22 @@ bool SjLjEHPass::doInitialization(Module &M) {
   return true;
 }
 
+/// insertCallSiteStore - Insert a store of the call-site value to the
+/// function context
+void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number,
+                                     Value *CallSite) {
+  ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()),
+                                              Number);
+  // Insert a store of the call-site number
+  new StoreInst(CallSiteNoC, CallSite, true, I);  // volatile
+}
+
 /// markInvokeCallSite - Insert code to mark the call_site for this invoke
-void SjLjEHPass::markInvokeCallSite(InvokeInst *II, unsigned InvokeNo,
+void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo,
                                     Value *CallSite,
                                     SwitchInst *CatchSwitch) {
   ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()),
-                                            InvokeNo);
+                                              InvokeNo);
   // The runtime comes back to the dispatcher with the call_site - 1 in
   // the context. Odd, but there it is.
   ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
@@ -145,8 +149,11 @@ void SjLjEHPass::markInvokeCallSite(InvokeInst *II, unsigned InvokeNo,
     }
   }
 
-  // Insert a store of the invoke num before the invoke
-  new StoreInst(CallSiteNoC, CallSite, true, II);  // volatile
+  // Insert the store of the call site value
+  insertCallSiteStore(II, InvokeNo, CallSite);
+
+  // Record the call site value for the back end so it stays associated with
+  // the invoke.
   CallInst::Create(CallSiteFn, CallSiteNoC, "", II);
 
   // Add a switch case to our unwind block.
@@ -272,8 +279,8 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
   SmallVector<InvokeInst*,16> Invokes;
 
   // Look through the terminators of the basic blocks to find invokes, returns
-  // and unwinds
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+  // and unwinds.
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
     if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
       // Remember all return instructions in case we insert an invoke into this
       // function.
@@ -283,6 +290,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
     } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
       Unwinds.push_back(UI);
     }
+  }
   // If we don't have any invokes or unwinds, there's nothing to do.
   if (Unwinds.empty() && Invokes.empty()) return false;
 
@@ -478,24 +486,21 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
     for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
       markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
 
-    // The front end has likely added calls to _Unwind_Resume. We need
-    // to find those calls and mark the call_site as -1 immediately prior.
-    // resume is a noreturn function, so any block that has a call to it
-    // should end in an 'unreachable' instruction with the call immediately
-    // prior. That's how we'll search.
-    // ??? There's got to be a better way. this is fugly.
-    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
-      if ((dyn_cast<UnreachableInst>(BB->getTerminator()))) {
-        BasicBlock::iterator I = BB->getTerminator();
-        // Check the previous instruction and see if it's a resume call
-        if (I == BB->begin()) continue;
-        if (CallInst *CI = dyn_cast<CallInst>(--I)) {
-          if (CI->getCalledFunction() == ResumeFn) {
-            Value *NegativeOne = Constant::getAllOnesValue(Int32Ty);
-            new StoreInst(NegativeOne, CallSite, true, I);  // volatile
-          }
+    // Mark call instructions that aren't nounwind as no-action
+    // (call_site == -1). Skip the entry block, as prior to then, no function
+    // context has been created for this function and any unexpected exceptions
+    // thrown will go directly to the caller's context, which is what we want
+    // anyway, so no need to do anything here.
+    for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
+      for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
+        if (CallInst *CI = dyn_cast<CallInst>(I)) {
+          // Ignore calls to the EH builtins (eh.selector, eh.exception)
+          Constant *Callee = CI->getCalledFunction();
+          if (Callee != SelectorFn && Callee != ExceptionFn
+              && !CI->doesNotThrow())
+            insertCallSiteStore(CI, -1, CallSite);
         }
-      }
+    }
 
     // Replace all unwinds with a branch to the unwind handler.
     // ??? Should this ever happen with sjlj exceptions?
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index ef6e12990d3d..3b3be5d9b1ad 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -550,8 +550,8 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
   }
 
   // Exception Handling.
-  LSDASection = getMachOSection("__TEXT", "__gcc_except_tab", 0,
-                                SectionKind::getReadOnlyWithRel());
+  LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0,
+                                SectionKind::getDataRel());
   EHFrameSection =
     getMachOSection("__TEXT", "__eh_frame",
                     MCSectionMachO::S_COALESCED |
@@ -652,7 +652,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
 
   // FIXME: Alignment check should be handled by section classifier.
   if (Kind.isMergeable1ByteCString() ||
-      Kind.isMergeable2ByteCString()) {
+      (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage())) {
     if (TM.getTargetData()->getPreferredAlignment(
                                               cast<GlobalVariable>(GV)) < 32) {
       if (Kind.isMergeable1ByteCString())
@@ -779,7 +779,7 @@ unsigned TargetLoweringObjectFileMachO::getFDEEncoding() const {
 }
 
 unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const {
-  return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+  return DW_EH_PE_absptr;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 0ba38433c220..c840b3968cd3 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -454,13 +454,10 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
                                      const TargetInstrInfo *TII,
                                      bool &IsCopy,
                                      unsigned &DstReg, bool &IsDstPhys) {
-  MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg);
-  if (UI == MRI->use_nodbg_end())
-    return 0;
-  MachineInstr &UseMI = *UI;
-  if (++UI != MRI->use_nodbg_end())
-    // More than one use.
+  if (!MRI->hasOneNonDBGUse(Reg))
+    // None or more than one use.
     return 0;
+  MachineInstr &UseMI = *MRI->use_nodbg_begin(Reg);
   if (UseMI.getParent() != MBB)
     return 0;
   unsigned SrcReg;
diff --git a/lib/CompilerDriver/Action.cpp b/lib/CompilerDriver/Action.cpp
index 7bcd30a8e0e7..9d07811c896c 100644
--- a/lib/CompilerDriver/Action.cpp
+++ b/lib/CompilerDriver/Action.cpp
@@ -15,6 +15,7 @@
 #include "llvm/CompilerDriver/BuiltinOptions.h"
 
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SystemUtils.h"
 #include "llvm/System/Program.h"
 #include "llvm/System/TimeValue.h"
 
@@ -24,13 +25,23 @@
 using namespace llvm;
 using namespace llvmc;
 
+namespace llvmc {
+
+extern int Main(int argc, char** argv);
+extern const char* ProgramName;
+
+}
+
 namespace {
   int ExecuteProgram(const std::string& name,
                      const StrVector& args) {
     sys::Path prog = sys::Program::FindProgramByName(name);
 
-    if (prog.isEmpty())
-      throw std::runtime_error("Can't find program '" + name + "'");
+    if (prog.isEmpty()) {
+      prog = FindExecutable(name, ProgramName, (void *)(intptr_t)&Main);
+      if (prog.isEmpty())
+        throw std::runtime_error("Can't find program '" + name + "'");
+    }
     if (!prog.canExecute())
       throw std::runtime_error("Program '" + name + "' is not executable.");
 
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 57c4375722c4..783ebb4deb61 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -156,53 +156,18 @@ namespace {
     // was no stub.  This function uses the call-site->function map to find a
     // relevant function, but asserts that only stubs and not other call sites
     // will be passed in.
-    Function *EraseStub(const MutexGuard &locked, void *Stub) {
-      CallSiteToFunctionMapTy::iterator C2F_I =
-        CallSiteToFunctionMap.find(Stub);
-      if (C2F_I == CallSiteToFunctionMap.end()) {
-        // Not a stub.
-        return NULL;
-      }
-
-      Function *const F = C2F_I->second;
-#ifndef NDEBUG
-      void *RealStub = FunctionToLazyStubMap.lookup(F);
-      assert(RealStub == Stub &&
-             "Call-site that wasn't a stub pass in to EraseStub");
-#endif
-      FunctionToLazyStubMap.erase(F);
-      CallSiteToFunctionMap.erase(C2F_I);
-
-      // Remove the stub from the function->call-sites map, and remove the whole
-      // entry from the map if that was the last call site.
-      FunctionToCallSitesMapTy::iterator F2C_I = FunctionToCallSitesMap.find(F);
-      assert(F2C_I != FunctionToCallSitesMap.end() &&
-             "FunctionToCallSitesMap broken");
-      bool Erased = F2C_I->second.erase(Stub);
-      (void)Erased;
-      assert(Erased && "FunctionToCallSitesMap broken");
-      if (F2C_I->second.empty())
-        FunctionToCallSitesMap.erase(F2C_I);
-
-      return F;
-    }
+    Function *EraseStub(const MutexGuard &locked, void *Stub);
 
-    void EraseAllCallSites(const MutexGuard &locked, Function *F) {
+    void EraseAllCallSitesFor(const MutexGuard &locked, Function *F) {
       assert(locked.holds(TheJIT->lock));
-      EraseAllCallSitesPrelocked(F);
-    }
-    void EraseAllCallSitesPrelocked(Function *F) {
-      FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F);
-      if (F2C == FunctionToCallSitesMap.end())
-        return;
-      for (SmallPtrSet<void*, 1>::const_iterator I = F2C->second.begin(),
-             E = F2C->second.end(); I != E; ++I) {
-        bool Erased = CallSiteToFunctionMap.erase(*I);
-        (void)Erased;
-        assert(Erased && "Missing call site->function mapping");
-      }
-      FunctionToCallSitesMap.erase(F2C);
+      EraseAllCallSitesForPrelocked(F);
     }
+    void EraseAllCallSitesForPrelocked(Function *F);
+
+    // Erases _all_ call sites regardless of their function.  This is used to
+    // unregister the stub addresses from the StubToResolverMap in
+    // ~JITResolver().
+    void EraseAllCallSitesPrelocked();
   };
 
   /// JITResolver - Keep track of, and resolve, call sites for functions that
@@ -240,6 +205,8 @@ namespace {
       LazyResolverFn = jit.getJITInfo().getLazyResolverFunction(JITCompilerFn);
     }
 
+    ~JITResolver();
+
     /// getLazyFunctionStubIfAvailable - This returns a pointer to a function's
     /// lazy-compilation stub if it has already been created.
     void *getLazyFunctionStubIfAvailable(Function *F);
@@ -259,8 +226,6 @@ namespace {
     void getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs,
                            SmallVectorImpl<void*> &Ptrs);
 
-    GlobalValue *invalidateStub(void *Stub);
-
     /// getGOTIndexForAddress - Return a new or existing index in the GOT for
     /// an address.  This function only manages slots, it does not manage the
     /// contents of the slots or the memory associated with the GOT.
@@ -305,6 +270,17 @@ namespace {
       --I;
       return I->second;
     }
+    /// True if any stubs refer to the given resolver. Only used in an assert().
+    /// O(N)
+    bool ResolverHasStubs(JITResolver* Resolver) const {
+      MutexGuard guard(Lock);
+      for (std::map<void*, JITResolver*>::const_iterator I = Map.begin(),
+             E = Map.end(); I != E; ++I) {
+        if (I->second == Resolver)
+          return true;
+      }
+      return false;
+    }
   };
   /// This needs to be static so that a lazy call stub can access it with no
   /// context except the address of the stub.
@@ -370,9 +346,6 @@ namespace {
     /// MMI - Machine module info for exception informations
     MachineModuleInfo* MMI;
 
-    // GVSet - a set to keep track of which globals have been seen
-    SmallPtrSet<const GlobalVariable*, 8> GVSet;
-
     // CurFn - The llvm function being emitted.  Only valid during
     // finishFunction().
     const Function *CurFn;
@@ -396,16 +369,6 @@ namespace {
     ValueMap<const Function *, EmittedCode,
              EmittedFunctionConfig> EmittedFunctions;
 
-    // CurFnStubUses - For a given Function, a vector of stubs that it
-    // references.  This facilitates the JIT detecting that a stub is no
-    // longer used, so that it may be deallocated.
-    DenseMap<AssertingVH<const Function>, SmallVector<void*, 1> > CurFnStubUses;
-
-    // StubFnRefs - For a given pointer to a stub, a set of Functions which
-    // reference the stub.  When the count of a stub's references drops to zero,
-    // the stub is unused.
-    DenseMap<void *, SmallPtrSet<const Function*, 1> > StubFnRefs;
-
     DILocation PrevDLT;
 
     /// Instance of the JIT
@@ -494,11 +457,6 @@ namespace {
     /// function body.
     void deallocateMemForFunction(const Function *F);
 
-    /// AddStubToCurrentFunction - Mark the current function being JIT'd as
-    /// using the stub at the specified address. Allows
-    /// deallocateMemForFunction to also remove stubs no longer referenced.
-    void AddStubToCurrentFunction(void *Stub);
-
     virtual void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn);
 
     virtual void emitLabel(uint64_t LabelID) {
@@ -529,14 +487,86 @@ namespace {
                              bool MayNeedFarStub);
     void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference);
     unsigned addSizeOfGlobal(const GlobalVariable *GV, unsigned Size);
-    unsigned addSizeOfGlobalsInConstantVal(const Constant *C, unsigned Size);
-    unsigned addSizeOfGlobalsInInitializer(const Constant *Init, unsigned Size);
+    unsigned addSizeOfGlobalsInConstantVal(
+      const Constant *C, unsigned Size,
+      SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals,
+      SmallVectorImpl<const GlobalVariable*> &Worklist);
+    unsigned addSizeOfGlobalsInInitializer(
+      const Constant *Init, unsigned Size,
+      SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals,
+      SmallVectorImpl<const GlobalVariable*> &Worklist);
     unsigned GetSizeOfGlobalsInBytes(MachineFunction &MF);
   };
 }
 
 void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) {
-  JRS->EraseAllCallSitesPrelocked(F);
+  JRS->EraseAllCallSitesForPrelocked(F);
+}
+
+Function *JITResolverState::EraseStub(const MutexGuard &locked, void *Stub) {
+  CallSiteToFunctionMapTy::iterator C2F_I =
+    CallSiteToFunctionMap.find(Stub);
+  if (C2F_I == CallSiteToFunctionMap.end()) {
+    // Not a stub.
+    return NULL;
+  }
+
+  StubToResolverMap->UnregisterStubResolver(Stub);
+
+  Function *const F = C2F_I->second;
+#ifndef NDEBUG
+  void *RealStub = FunctionToLazyStubMap.lookup(F);
+  assert(RealStub == Stub &&
+         "Call-site that wasn't a stub passed in to EraseStub");
+#endif
+  FunctionToLazyStubMap.erase(F);
+  CallSiteToFunctionMap.erase(C2F_I);
+
+  // Remove the stub from the function->call-sites map, and remove the whole
+  // entry from the map if that was the last call site.
+  FunctionToCallSitesMapTy::iterator F2C_I = FunctionToCallSitesMap.find(F);
+  assert(F2C_I != FunctionToCallSitesMap.end() &&
+         "FunctionToCallSitesMap broken");
+  bool Erased = F2C_I->second.erase(Stub);
+  (void)Erased;
+  assert(Erased && "FunctionToCallSitesMap broken");
+  if (F2C_I->second.empty())
+    FunctionToCallSitesMap.erase(F2C_I);
+
+  return F;
+}
+
+void JITResolverState::EraseAllCallSitesForPrelocked(Function *F) {
+  FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F);
+  if (F2C == FunctionToCallSitesMap.end())
+    return;
+  StubToResolverMapTy &S2RMap = *StubToResolverMap;
+  for (SmallPtrSet<void*, 1>::const_iterator I = F2C->second.begin(),
+         E = F2C->second.end(); I != E; ++I) {
+    S2RMap.UnregisterStubResolver(*I);
+    bool Erased = CallSiteToFunctionMap.erase(*I);
+    (void)Erased;
+    assert(Erased && "Missing call site->function mapping");
+  }
+  FunctionToCallSitesMap.erase(F2C);
+}
+
+void JITResolverState::EraseAllCallSitesPrelocked() {
+  StubToResolverMapTy &S2RMap = *StubToResolverMap;
+  for (CallSiteToFunctionMapTy::const_iterator
+         I = CallSiteToFunctionMap.begin(),
+         E = CallSiteToFunctionMap.end(); I != E; ++I) {
+    S2RMap.UnregisterStubResolver(I->first);
+  }
+  CallSiteToFunctionMap.clear();
+  FunctionToCallSitesMap.clear();
+}
+
+JITResolver::~JITResolver() {
+  // No need to lock because we're in the destructor, and state isn't shared.
+  state.EraseAllCallSitesPrelocked();
+  assert(!StubToResolverMap->ResolverHasStubs(this) &&
+         "Resolver destroyed with stubs still alive.");
 }
 
 /// getLazyFunctionStubIfAvailable - This returns a pointer to a function stub
@@ -589,20 +619,22 @@ void *JITResolver::getLazyFunctionStub(Function *F) {
   DEBUG(dbgs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '"
         << F->getName() << "'\n");
 
-  // Register this JITResolver as the one corresponding to this call site so
-  // JITCompilerFn will be able to find it.
-  StubToResolverMap->RegisterStubResolver(Stub, this);
-
-  // Finally, keep track of the stub-to-Function mapping so that the
-  // JITCompilerFn knows which function to compile!
-  state.AddCallSite(locked, Stub, F);
-
-  // If we are JIT'ing non-lazily but need to call a function that does not
-  // exist yet, add it to the JIT's work list so that we can fill in the stub
-  // address later.
-  if (!Actual && !TheJIT->isCompilingLazily())
-    if (!isNonGhostDeclaration(F) && !F->hasAvailableExternallyLinkage())
-      TheJIT->addPendingFunction(F);
+  if (TheJIT->isCompilingLazily()) {
+    // Register this JITResolver as the one corresponding to this call site so
+    // JITCompilerFn will be able to find it.
+    StubToResolverMap->RegisterStubResolver(Stub, this);
+
+    // Finally, keep track of the stub-to-Function mapping so that the
+    // JITCompilerFn knows which function to compile!
+    state.AddCallSite(locked, Stub, F);
+  } else if (!Actual) {
+    // If we are JIT'ing non-lazily but need to call a function that does not
+    // exist yet, add it to the JIT's work list so that we can fill in the
+    // stub address later.
+    assert(!isNonGhostDeclaration(F) && !F->hasAvailableExternallyLinkage() &&
+           "'Actual' should have been set above.");
+    TheJIT->addPendingFunction(F);
+  }
 
   return Stub;
 }
@@ -676,42 +708,6 @@ void JITResolver::getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs,
   }
 }
 
-GlobalValue *JITResolver::invalidateStub(void *Stub) {
-  MutexGuard locked(TheJIT->lock);
-
-  // Remove the stub from the StubToResolverMap.
-  StubToResolverMap->UnregisterStubResolver(Stub);
-
-  GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked);
-
-  // Look up the cheap way first, to see if it's a function stub we are
-  // invalidating.  If so, remove it from both the forward and reverse maps.
-  if (Function *F = state.EraseStub(locked, Stub)) {
-    return F;
-  }
-
-  // Otherwise, it might be an indirect symbol stub.  Find it and remove it.
-  for (GlobalToIndirectSymMapTy::iterator i = GM.begin(), e = GM.end();
-       i != e; ++i) {
-    if (i->second != Stub)
-      continue;
-    GlobalValue *GV = i->first;
-    GM.erase(i);
-    return GV;
-  }
-
-  // Lastly, check to see if it's in the ExternalFnToStubMap.
-  for (std::map<void *, void *>::iterator i = ExternalFnToStubMap.begin(),
-       e = ExternalFnToStubMap.end(); i != e; ++i) {
-    if (i->second != Stub)
-      continue;
-    ExternalFnToStubMap.erase(i);
-    break;
-  }
-
-  return 0;
-}
-
 /// JITCompilerFn - This function is called when a lazy compilation stub has
 /// been entered.  It looks up which function this stub corresponds to, compiles
 /// it if necessary, then returns the resultant function pointer.
@@ -797,7 +793,6 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
     // that we're returning the same address for the function as any previous
     // call.  TODO: Yes, this is wrong. The lazy stub isn't guaranteed to be
     // close enough to call.
-    AddStubToCurrentFunction(FnStub);
     return FnStub;
   }
 
@@ -814,18 +809,10 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
       return TheJIT->getPointerToFunction(F);
   }
 
-  // Otherwise, we may need a to emit a stub, and, conservatively, we
-  // always do so.
-  void *StubAddr = Resolver.getLazyFunctionStub(F);
-
-  // Add the stub to the current function's list of referenced stubs, so we can
-  // deallocate them if the current function is ever freed.  It's possible to
-  // return null from getLazyFunctionStub in the case of a weak extern that
-  // fails to resolve.
-  if (StubAddr)
-    AddStubToCurrentFunction(StubAddr);
-
-  return StubAddr;
+  // Otherwise, we may need a to emit a stub, and, conservatively, we always do
+  // so.  Note that it's possible to return null from getLazyFunctionStub in the
+  // case of a weak extern that fails to resolve.
+  return Resolver.getLazyFunctionStub(F);
 }
 
 void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) {
@@ -833,24 +820,9 @@ void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) {
   // resolved address.
   void *GVAddress = getPointerToGlobal(V, Reference, false);
   void *StubAddr = Resolver.getGlobalValueIndirectSym(V, GVAddress);
-
-  // Add the stub to the current function's list of referenced stubs, so we can
-  // deallocate them if the current function is ever freed.
-  AddStubToCurrentFunction(StubAddr);
-
   return StubAddr;
 }
 
-void JITEmitter::AddStubToCurrentFunction(void *StubAddr) {
-  assert(CurFn && "Stub added to current function, but current function is 0!");
-
-  SmallVectorImpl<void*> &StubsUsed = CurFnStubUses[CurFn];
-  StubsUsed.push_back(StubAddr);
-
-  SmallPtrSet<const Function *, 1> &FnRefs = StubFnRefs[StubAddr];
-  FnRefs.insert(CurFn);
-}
-
 void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) {
   if (!DL.isUnknown()) {
     DILocation CurDLT = EmissionDetails.MF->getDILocation(DL);
@@ -922,11 +894,14 @@ unsigned JITEmitter::addSizeOfGlobal(const GlobalVariable *GV, unsigned Size) {
 }
 
 /// addSizeOfGlobalsInConstantVal - find any globals that we haven't seen yet
-/// but are referenced from the constant; put them in GVSet and add their
-/// size into the running total Size.
-
-unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
-                                              unsigned Size) {
+/// but are referenced from the constant; put them in SeenGlobals and the
+/// Worklist, and add their size into the running total Size.
+
+unsigned JITEmitter::addSizeOfGlobalsInConstantVal(
+    const Constant *C,
+    unsigned Size,
+    SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals,
+    SmallVectorImpl<const GlobalVariable*> &Worklist) {
   // If its undefined, return the garbage.
   if (isa<UndefValue>(C))
     return Size;
@@ -948,7 +923,7 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
     case Instruction::PtrToInt:
     case Instruction::IntToPtr:
     case Instruction::BitCast: {
-      Size = addSizeOfGlobalsInConstantVal(Op0, Size);
+      Size = addSizeOfGlobalsInConstantVal(Op0, Size, SeenGlobals, Worklist);
       break;
     }
     case Instruction::Add:
@@ -964,8 +939,9 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
     case Instruction::And:
     case Instruction::Or:
     case Instruction::Xor: {
-      Size = addSizeOfGlobalsInConstantVal(Op0, Size);
-      Size = addSizeOfGlobalsInConstantVal(CE->getOperand(1), Size);
+      Size = addSizeOfGlobalsInConstantVal(Op0, Size, SeenGlobals, Worklist);
+      Size = addSizeOfGlobalsInConstantVal(CE->getOperand(1), Size,
+                                           SeenGlobals, Worklist);
       break;
     }
     default: {
@@ -979,8 +955,10 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
 
   if (C->getType()->getTypeID() == Type::PointerTyID)
     if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(C))
-      if (GVSet.insert(GV))
+      if (SeenGlobals.insert(GV)) {
+        Worklist.push_back(GV);
         Size = addSizeOfGlobal(GV, Size);
+      }
 
   return Size;
 }
@@ -988,15 +966,18 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
 /// addSizeOfGLobalsInInitializer - handle any globals that we haven't seen yet
 /// but are referenced from the given initializer.
 
-unsigned JITEmitter::addSizeOfGlobalsInInitializer(const Constant *Init,
-                                              unsigned Size) {
+unsigned JITEmitter::addSizeOfGlobalsInInitializer(
+    const Constant *Init,
+    unsigned Size,
+    SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals,
+    SmallVectorImpl<const GlobalVariable*> &Worklist) {
   if (!isa<UndefValue>(Init) &&
       !isa<ConstantVector>(Init) &&
       !isa<ConstantAggregateZero>(Init) &&
       !isa<ConstantArray>(Init) &&
       !isa<ConstantStruct>(Init) &&
       Init->getType()->isFirstClassType())
-    Size = addSizeOfGlobalsInConstantVal(Init, Size);
+    Size = addSizeOfGlobalsInConstantVal(Init, Size, SeenGlobals, Worklist);
   return Size;
 }
 
@@ -1007,7 +988,7 @@ unsigned JITEmitter::addSizeOfGlobalsInInitializer(const Constant *Init,
 
 unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
   unsigned Size = 0;
-  GVSet.clear();
+  SmallPtrSet<const GlobalVariable*, 8> SeenGlobals;
 
   for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
        MBB != E; ++MBB) {
@@ -1031,7 +1012,7 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
           // assuming the addresses of the new globals in this module
           // start at 0 (or something) and adjusting them after codegen
           // complete.  Another possibility is to grab a marker bit in GV.
-          if (GVSet.insert(GV))
+          if (SeenGlobals.insert(GV))
             // A variable as yet unseen.  Add in its size.
             Size = addSizeOfGlobal(GV, Size);
         }
@@ -1040,12 +1021,14 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
   }
   DEBUG(dbgs() << "JIT: About to look through initializers\n");
   // Look for more globals that are referenced only from initializers.
-  // GVSet.end is computed each time because the set can grow as we go.
-  for (SmallPtrSet<const GlobalVariable *, 8>::iterator I = GVSet.begin();
-       I != GVSet.end(); I++) {
-    const GlobalVariable* GV = *I;
+  SmallVector<const GlobalVariable*, 8> Worklist(
+    SeenGlobals.begin(), SeenGlobals.end());
+  while (!Worklist.empty()) {
+    const GlobalVariable* GV = Worklist.back();
+    Worklist.pop_back();
     if (GV->hasInitializer())
-      Size = addSizeOfGlobalsInInitializer(GV->getInitializer(), Size);
+      Size = addSizeOfGlobalsInInitializer(GV->getInitializer(), Size,
+                                           SeenGlobals, Worklist);
   }
 
   return Size;
@@ -1347,40 +1330,6 @@ void JITEmitter::deallocateMemForFunction(const Function *F) {
   if (JITEmitDebugInfo) {
     DR->UnregisterFunction(F);
   }
-
-  // If the function did not reference any stubs, return.
-  if (CurFnStubUses.find(F) == CurFnStubUses.end())
-    return;
-
-  // For each referenced stub, erase the reference to this function, and then
-  // erase the list of referenced stubs.
-  SmallVectorImpl<void *> &StubList = CurFnStubUses[F];
-  for (unsigned i = 0, e = StubList.size(); i != e; ++i) {
-    void *Stub = StubList[i];
-
-    // If we already invalidated this stub for this function, continue.
-    if (StubFnRefs.count(Stub) == 0)
-      continue;
-
-    SmallPtrSet<const Function *, 1> &FnRefs = StubFnRefs[Stub];
-    FnRefs.erase(F);
-
-    // If this function was the last reference to the stub, invalidate the stub
-    // in the JITResolver.  Were there a memory manager deallocateStub routine,
-    // we could call that at this point too.
-    if (FnRefs.empty()) {
-      DEBUG(dbgs() << "\nJIT: Invalidated Stub at [" << Stub << "]\n");
-      StubFnRefs.erase(Stub);
-
-      // Invalidate the stub.  If it is a GV stub, update the JIT's global
-      // mapping for that GV to zero.
-      GlobalValue *GV = Resolver.invalidateStub(Stub);
-      if (GV) {
-        TheJIT->updateGlobalMapping(GV, 0);
-      }
-    }
-  }
-  CurFnStubUses.erase(F);
 }
 
 
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 25c3fbdef32a..071c924d9195 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -368,6 +368,7 @@ void format_object_base::home() {
 /// if no error occurred.
 raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
                                unsigned Flags) : pos(0) {
+  assert(Filename != 0 && "Filename is null");
   // Verify that we don't have both "append" and "excl".
   assert((!(Flags & F_Excl) || !(Flags & F_Append)) &&
          "Cannot specify both 'excl' and 'append' file creation flags!");
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 80449668caf2..577c36318f73 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -1350,7 +1350,9 @@ emitPrologue(MachineFunction &MF) const {
   unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
   unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
   unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
-  AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+  if (STI.isTargetDarwin() || hasFP(MF))
+    AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
+                                NumBytes);
   AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
   AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
   AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 1c77f2701472..786dd65b8742 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -734,7 +734,7 @@ def tMOVgpr2gpr  : T1I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
 // multiply register
 let isCommutable = 1 in
 def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMUL32,
-                 "mul", "\t$dst, $rhs",
+                 "mul", "\t$dst, $rhs, $dst", /* A8.6.105 MUL Encoding T1 */
                  [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>,
            T1DataProcessing<0b1101>;
 
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 316567d0ab9b..6241766a32df 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -131,7 +131,7 @@ def t2addrmode_imm12 : Operand<i32>,
   let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
 }
 
-// t2addrmode_imm8  := reg - imm8
+// t2addrmode_imm8  := reg +/- imm8
 def t2addrmode_imm8 : Operand<i32>,
                       ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
   let PrintMethod = "printT2AddrModeImm8Operand";
@@ -657,6 +657,32 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
    }
 }
 
+// SXTB16 and UXTB16 do not need the .w qualifier.
+multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> {
+  def r     : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+                  opc, "\t$dst, $src",
+                 [(set GPR:$dst, (opnode GPR:$src))]> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+     let Inst{5-4} = 0b00; // rotate
+   }
+  def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
+                  opc, "\t$dst, $src, ror $rot",
+                 [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+     let Inst{5-4} = {?,?}; // rotate
+   }
+}
+
 // DO variant - disassembly only, no pattern
 
 multiclass T2I_unary_rrot_DO<bits<3> opcod, string opc> {
@@ -983,6 +1009,28 @@ def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb),
                             []>;
 }
 
+// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are
+// for disassembly only.
+// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
+class T2IldT<bit signed, bits<2> type, string opc>
+  : T2Ii8<(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc,
+          "\t$dst, $addr", []> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-25} = 0b00;
+  let Inst{24} = signed;
+  let Inst{23} = 0;
+  let Inst{22-21} = type;
+  let Inst{20} = 1; // load
+  let Inst{11} = 1;
+  let Inst{10-8} = 0b110; // PUW.
+}
+
+def t2LDRT   : T2IldT<0, 0b10, "ldrt">;
+def t2LDRBT  : T2IldT<0, 0b00, "ldrbt">;
+def t2LDRHT  : T2IldT<0, 0b01, "ldrht">;
+def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt">;
+def t2LDRSHT : T2IldT<1, 0b01, "ldrsht">;
+
 // Store
 defm t2STR :T2I_st<0b10,"str", BinOpFrag<(store node:$LHS, node:$RHS)>>;
 defm t2STRB:T2I_st<0b00,"strb",BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
@@ -1037,9 +1085,98 @@ def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb),
         [(set GPR:$base_wb,
               (post_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
 
+// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
+// only.
+// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
+class T2IstT<bits<2> type, string opc>
+  : T2Ii8<(outs GPR:$src), (ins t2addrmode_imm8:$addr), IIC_iStorei, opc,
+          "\t$src, $addr", []> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-25} = 0b00;
+  let Inst{24} = 0; // not signed
+  let Inst{23} = 0;
+  let Inst{22-21} = type;
+  let Inst{20} = 0; // store
+  let Inst{11} = 1;
+  let Inst{10-8} = 0b110; // PUW
+}
+
+def t2STRT   : T2IstT<0b10, "strt">;
+def t2STRBT  : T2IstT<0b00, "strbt">;
+def t2STRHT  : T2IstT<0b01, "strht">;
 
 // FIXME: ldrd / strd pre / post variants
 
+// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
+// data/instruction access.  These are for disassembly only.
+multiclass T2Ipl<bit instr, bit write, string opc> {
+
+  def i12 : T2I<(outs), (ins t2addrmode_imm12:$addr), IIC_iLoadi, opc,
+                "\t$addr", []> {
+    let Inst{31-25} = 0b1111100;
+    let Inst{24} = instr;
+    let Inst{23} = 1; // U = 1
+    let Inst{22} = 0;
+    let Inst{21} = write;
+    let Inst{20} = 1;
+    let Inst{15-12} = 0b1111;
+  }
+
+  def i8 : T2I<(outs), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc,
+                "\t$addr", []> {
+    let Inst{31-25} = 0b1111100;
+    let Inst{24} = instr;
+    let Inst{23} = 0; // U = 0
+    let Inst{22} = 0;
+    let Inst{21} = write;
+    let Inst{20} = 1;
+    let Inst{15-12} = 0b1111;
+    let Inst{11-8} = 0b1100;
+  }
+
+  // A8.6.118 #0 and #-0 differs.  Translates -0 to -1, -1 to -2, ..., etc.
+  def pci : T2I<(outs), (ins GPR:$base, i32imm:$imm), IIC_iLoadi, opc,
+                "\t[pc, ${imm:negzero}]", []> {
+    let Inst{31-25} = 0b1111100;
+    let Inst{24} = instr;
+    let Inst{23} = ?; // add = (U == 1)
+    let Inst{22} = 0;
+    let Inst{21} = write;
+    let Inst{20} = 1;
+    let Inst{19-16} = 0b1111; // Rn = 0b1111
+    let Inst{15-12} = 0b1111;
+  }
+
+  def r   : T2I<(outs), (ins GPR:$base, GPR:$a), IIC_iLoadi, opc,
+                "\t[$base, $a]", []> {
+    let Inst{31-25} = 0b1111100;
+    let Inst{24} = instr;
+    let Inst{23} = 0; // add = TRUE for T1
+    let Inst{22} = 0;
+    let Inst{21} = write;
+    let Inst{20} = 1;
+    let Inst{15-12} = 0b1111;
+    let Inst{11-6} = 0000000;
+    let Inst{5-4} = 0b00; // no shift is applied
+  }
+
+  def s   : T2I<(outs), (ins GPR:$base, GPR:$a, i32imm:$shamt), IIC_iLoadi, opc,
+                "\t[$base, $a, lsl $shamt]", []> {
+    let Inst{31-25} = 0b1111100;
+    let Inst{24} = instr;
+    let Inst{23} = 0; // add = TRUE for T1
+    let Inst{22} = 0;
+    let Inst{21} = write;
+    let Inst{20} = 1;
+    let Inst{15-12} = 0b1111;
+    let Inst{11-6} = 0000000;
+  }
+}
+
+defm t2PLD  : T2Ipl<0, 0, "pld">;
+defm t2PLDW : T2Ipl<0, 1, "pldw">;
+defm t2PLI  : T2Ipl<1, 0, "pli">;
+
 //===----------------------------------------------------------------------===//
 //  Load / store multiple Instructions.
 //
@@ -1149,7 +1286,7 @@ defm t2UXTB   : T2I_unary_rrot<0b101, "uxtb",
                                UnOpFrag<(and node:$Src, 0x000000FF)>>;
 defm t2UXTH   : T2I_unary_rrot<0b001, "uxth",
                                UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
-defm t2UXTB16 : T2I_unary_rrot<0b011, "uxtb16",
+defm t2UXTB16 : T2I_unary_rrot_nw<0b011, "uxtb16",
                                UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
 
 def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 5b4f02d6b75b..19f1e3ba12cf 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -761,6 +761,11 @@ static bool isMemoryOp(const MachineInstr *MI) {
       MI->getOperand(0).isUndef())
     return false;
 
+  // Likewise don't mess with references to undefined addresses.
+  if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
+      MI->getOperand(1).isUndef())
+    return false;
+
   int Opcode = MI->getOpcode();
   switch (Opcode) {
   default: break;
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index b61ce29aab80..163d1e9a0dd2 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -778,9 +778,19 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
 }
 
 static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
-  return (MI->getOpcode() == ARM::tRestore &&
-          MI->getOperand(1).isFI() &&
-          isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs));
+  if (MI->getOpcode() == ARM::tRestore &&
+      MI->getOperand(1).isFI() &&
+      isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
+    return true;
+  else if (MI->getOpcode() == ARM::tPOP) {
+    // The first three operands are predicates and such. The last two are
+    // imp-def and imp-use of SP. Check everything in between.
+    for (int i = 3, e = MI->getNumOperands() - 2; i != e; ++i)
+      if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+        return false;
+    return true;
+  }
+  return false;
 }
 
 void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
@@ -794,13 +804,13 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
   int NumBytes = (int)MFI->getStackSize();
+  const unsigned *CSRegs = getCalleeSavedRegs();
 
   if (!AFI->hasStackFrame()) {
     if (NumBytes != 0)
       emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes);
   } else {
     // Unwind MBBI to point to first LDR / VLDRD.
-    const unsigned *CSRegs = getCalleeSavedRegs();
     if (MBBI != MBB.begin()) {
       do
         --MBBI;
@@ -836,6 +846,9 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
   }
 
   if (VARegSaveSize) {
+    // Move back past the callee-saved register restoration
+    while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs))
+      ++MBBI;
     // Epilogue for vararg functions: pop LR to R3 and branch off it.
     AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
       .addReg(0) // No write back.
@@ -845,6 +858,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
 
     BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
       .addReg(ARM::R3, RegState::Kill);
+    // erase the old tBX_RET instruction
     MBB.erase(MBBI);
   }
 }
diff --git a/lib/Target/Alpha/AlphaCallingConv.td b/lib/Target/Alpha/AlphaCallingConv.td
index 38ada69c2425..bde8819f46e4 100644
--- a/lib/Target/Alpha/AlphaCallingConv.td
+++ b/lib/Target/Alpha/AlphaCallingConv.td
@@ -14,7 +14,8 @@
 //===----------------------------------------------------------------------===//
 def RetCC_Alpha : CallingConv<[
   // i64 is returned in register R0
-  CCIfType<[i64], CCAssignToReg<[R0]>>,
+  // R1 is an llvm extension, I don't know what gcc does
+  CCIfType<[i64], CCAssignToReg<[R0,R1]>>,
 
   // f32 / f64 are returned in F0/F1
   CCIfType<[f32, f64], CCAssignToReg<[F0, F1]>>
diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
index 5ef3c6b24de7..3e17a51b505f 100644
--- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp
+++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
@@ -34,5 +34,8 @@ SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) {
   // Exception handling is not supported on CellSPU (think about it: you only
   // have 256K for code+data. Would you support exception handling?)
   ExceptionsType = ExceptionHandling::None;
+
+  // SPU assembly requires ".section" before ".bss" 
+  UsesELFSectionDirectiveForBSS = true;  
 }
 
diff --git a/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp b/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp
index 46cc81967ebd..f1bdb1210fc6 100644
--- a/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp
+++ b/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp
@@ -15,7 +15,8 @@ using namespace llvm;
 Target llvm::ThePIC16Target, llvm::TheCooperTarget;
 
 extern "C" void LLVMInitializePIC16TargetInfo() { 
-  RegisterTarget<> X(ThePIC16Target, "pic16", "PIC16 14-bit [experimental]");
+  RegisterTarget<Triple::pic16> X(ThePIC16Target, "pic16",
+                                  "PIC16 14-bit [experimental]");
 
   RegisterTarget<> Y(TheCooperTarget, "cooper", "PIC16 Cooper [experimental]");
 }
diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
index d6b45be293c0..f6753a637cd0 100644
--- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
@@ -204,9 +204,10 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 /// isBlockOnlyReachableByFallthough - Return true if the basic block has
 /// exactly one predecessor and the control transfer mechanism between
 /// the predecessor and this block is a fall-through.
-/// Override AsmPrinter implementation to handle delay slots
-bool SparcAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) 
-    const {
+///
+/// This overrides AsmPrinter's implementation to handle delay slots.
+bool SparcAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
   // If this is a landing pad, it isn't a fall through.  If it has no preds,
   // then nothing falls through to it.
   if (MBB->isLandingPad() || MBB->pred_empty())
@@ -224,10 +225,10 @@ bool SparcAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock
   if (!Pred->isLayoutSuccessor(MBB))
     return false;
   
-  // Check if the last terminator is an unconditional branch
+  // Check if the last terminator is an unconditional branch.
   MachineBasicBlock::const_iterator I = Pred->end();
-  while( I != Pred->begin() && !(--I)->getDesc().isTerminator() )
-      ; /* Noop */
+  while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+    ; // Noop
   return I == Pred->end() || !I->getDesc().isBarrier();
 }
 
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 17366ee8bcb4..98e3f4efe588 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -388,6 +388,8 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
   }
 
   case Instruction::GetElementPtr: {
+    X86AddressMode SavedAM = AM;
+
     // Pattern-match simple GEPs.
     uint64_t Disp = (int32_t)AM.Disp;
     unsigned IndexReg = AM.IndexReg;
@@ -428,7 +430,13 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
     AM.IndexReg = IndexReg;
     AM.Scale = Scale;
     AM.Disp = (uint32_t)Disp;
-    return X86SelectAddress(U->getOperand(0), AM);
+    if (X86SelectAddress(U->getOperand(0), AM))
+      return true;
+    
+    // If we couldn't merge the sub value into this addr mode, revert back to
+    // our address and just match the value instead of completely failing.
+    AM = SavedAM;
+    break;
   unsupported_gep:
     // Ok, the GEP indices weren't all covered.
     break;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 08030e0630fc..3fad8ade41ba 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -413,6 +413,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
 }
 
 void X86DAGToDAGISel::PreprocessISelDAG() {
+  // OptForSize is used in pattern predicates that isel is matching.
   OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize);
   
   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index e2b81930aecf..8384ab753a93 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -990,7 +990,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
   setTargetDAGCombine(ISD::BUILD_VECTOR);
   setTargetDAGCombine(ISD::SELECT);
-  setTargetDAGCombine(ISD::AND);
   setTargetDAGCombine(ISD::SHL);
   setTargetDAGCombine(ISD::SRA);
   setTargetDAGCombine(ISD::SRL);
@@ -2236,7 +2235,8 @@ static
 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
                          MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
                          const X86InstrInfo *TII) {
-  int FI;
+  unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
+  int FI = INT_MAX;
   if (Arg.getOpcode() == ISD::CopyFromReg) {
     unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
     if (!VR || TargetRegisterInfo::isPhysicalRegister(VR))
@@ -2252,25 +2252,30 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
       if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) &&
           Def->getOperand(1).isFI()) {
         FI = Def->getOperand(1).getIndex();
-        if (MFI->getObjectSize(FI) != Flags.getByValSize())
-          return false;
+        Bytes = Flags.getByValSize();
       } else
         return false;
     }
-  } else {
-    LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg);
-    if (!Ld)
+  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
+    if (Flags.isByVal())
+      // ByVal argument is passed in as a pointer but it's now being
+      // dereferenced. e.g.
+      // define @foo(%struct.X* %A) {
+      //   tail call @bar(%struct.X* byval %A)
+      // }
       return false;
     SDValue Ptr = Ld->getBasePtr();
     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
     if (!FINode)
       return false;
     FI = FINode->getIndex();
-  }
+  } else
+    return false;
 
+  assert(FI != INT_MAX);
   if (!MFI->isFixedObjectIndex(FI))
     return false;
-  return Offset == MFI->getObjectOffset(FI);
+  return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
 }
 
 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
@@ -9174,58 +9179,6 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-/// PerformANDCombine - Look for SSE and instructions of this form:
-/// (and x, (build_vector signbit,signbit,signbit,signbit)). If there
-/// exists a use of a build_vector that's the bitwise complement of the mask,
-/// then transform the node to
-/// (and (xor x, (build_vector -1,-1,-1,-1)), (build_vector ~sb,~sb,~sb,~sb)).
-static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG,
-                                 TargetLowering::DAGCombinerInfo &DCI) {
-  EVT VT = N->getValueType(0);
-  if (!VT.isVector() || !VT.isInteger())
-    return SDValue();
-
-  SDValue N0 = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
-  if (N0.getOpcode() == ISD::XOR || !N1.hasOneUse())
-    return SDValue();
-
-  if (N1.getOpcode() == ISD::BUILD_VECTOR) {
-    unsigned NumElts = VT.getVectorNumElements();
-    EVT EltVT = VT.getVectorElementType();
-    SmallVector<SDValue, 8> Mask;
-    Mask.reserve(NumElts);
-    for (unsigned i = 0; i != NumElts; ++i) {
-      SDValue Arg = N1.getOperand(i);
-      if (Arg.getOpcode() == ISD::UNDEF) {
-        Mask.push_back(Arg);
-        continue;
-      }
-      ConstantSDNode *C = dyn_cast<ConstantSDNode>(Arg);
-      if (!C)
-        return SDValue();
-      if (!C->getAPIntValue().isSignBit() &&
-          !C->getAPIntValue().isMaxSignedValue())
-        return SDValue();
-      Mask.push_back(DAG.getConstant(~C->getAPIntValue(), EltVT));
-    }
-    N1 = DAG.getNode(ISD::BUILD_VECTOR, N1.getDebugLoc(), VT,
-                     &Mask[0], NumElts);
-    if (!N1.use_empty()) {
-      unsigned Bits = EltVT.getSizeInBits();
-      Mask.clear();
-      for (unsigned i = 0; i != NumElts; ++i)
-        Mask.push_back(DAG.getConstant(APInt::getAllOnesValue(Bits), EltVT));
-      SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
-                                    VT, &Mask[0], NumElts);
-      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
-                         DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
-                                     N0, NewMask), N1);
-    }
-  }
-
-  return SDValue();
-}
 
 /// PerformMulCombine - Optimize a single multiply with constant into two
 /// in order to implement it with two cheaper instructions, e.g.
@@ -9755,7 +9708,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
   case ISD::SELECT:         return PerformSELECTCombine(N, DAG, Subtarget);
   case X86ISD::CMOV:        return PerformCMOVCombine(N, DAG, DCI);
-  case ISD::AND:            return PerformANDCombine(N, DAG, DCI);
   case ISD::MUL:            return PerformMulCombine(N, DAG, DCI);
   case ISD::SHL:
   case ISD::SRA:
@@ -9838,11 +9790,20 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
     // rorw $$8, ${0:w}  -->  llvm.bswap.i16
     if (CI->getType()->isIntegerTy(16) &&
         AsmPieces.size() == 3 &&
-        AsmPieces[0] == "rorw" &&
+        (AsmPieces[0] == "rorw" || AsmPieces[0] == "rolw") &&
         AsmPieces[1] == "$$8," &&
         AsmPieces[2] == "${0:w}" &&
-        IA->getConstraintString() == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}") {
-      return LowerToBSwap(CI);
+        IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
+      AsmPieces.clear();
+      SplitString(IA->getConstraintString().substr(5), AsmPieces, ",");
+      std::sort(AsmPieces.begin(), AsmPieces.end());
+      if (AsmPieces.size() == 4 &&
+          AsmPieces[0] == "~{cc}" &&
+          AsmPieces[1] == "~{dirflag}" &&
+          AsmPieces[2] == "~{flags}" &&
+          AsmPieces[3] == "~{fpsr}") {
+        return LowerToBSwap(CI);
+      }
     }
     break;
   case 3:
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index cfe71a5acbcc..d46b9469f10c 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1050,7 +1050,10 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_32:$dst), (ins GR32:$src),
 //
 
 // Extra precision multiplication
-let Defs = [AL,AH,EFLAGS], Uses = [AL] in
+
+// AL is really implied by AX, by the registers in Defs must match the
+// SDNode results (i8, i32).
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
 def MUL8r  : I<0xF6, MRM4r, (outs),  (ins GR8:$src), "mul{b}\t$src",
                // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
                // This probably ought to be moved to a def : Pat<> if the
@@ -1068,7 +1071,7 @@ def MUL32r : I<0xF7, MRM4r, (outs),  (ins GR32:$src),
                "mul{l}\t$src",
                []>; // EAX,EDX = EAX*GR32
 
-let Defs = [AL,AH,EFLAGS], Uses = [AL] in
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
 def MUL8m  : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
                "mul{b}\t$src",
                // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
@@ -1090,7 +1093,7 @@ def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
 }
 
 let neverHasSideEffects = 1 in {
-let Defs = [AL,AH,EFLAGS], Uses = [AL] in
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
 def IMUL8r  : I<0xF6, MRM5r, (outs),  (ins GR8:$src), "imul{b}\t$src", []>;
               // AL,AH = AL*GR8
 let Defs = [AX,DX,EFLAGS], Uses = [AX] in
@@ -1100,7 +1103,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
 def IMUL32r : I<0xF7, MRM5r, (outs),  (ins GR32:$src), "imul{l}\t$src", []>;
               // EAX,EDX = EAX*GR32
 let mayLoad = 1 in {
-let Defs = [AL,AH,EFLAGS], Uses = [AL] in
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
 def IMUL8m  : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
                 "imul{b}\t$src", []>;    // AL,AH = AL*[mem8]
 let Defs = [AX,DX,EFLAGS], Uses = [AX] in
@@ -1113,7 +1116,7 @@ def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
 } // neverHasSideEffects
 
 // unsigned division/remainder
-let Defs = [AX,EFLAGS], Uses = [AX] in
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
 def DIV8r  : I<0xF6, MRM6r, (outs),  (ins GR8:$src),    // AX/r8 = AL,AH
                "div{b}\t$src", []>;
 let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
@@ -1123,7 +1126,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
 def DIV32r : I<0xF7, MRM6r, (outs),  (ins GR32:$src),   // EDX:EAX/r32 = EAX,EDX
                "div{l}\t$src", []>;
 let mayLoad = 1 in {
-let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
 def DIV8m  : I<0xF6, MRM6m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
                "div{b}\t$src", []>;
 let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
@@ -1136,7 +1139,7 @@ def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
 }
 
 // Signed division/remainder.
-let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
 def IDIV8r : I<0xF6, MRM7r, (outs),  (ins GR8:$src),    // AX/r8 = AL,AH
                "idiv{b}\t$src", []>;
 let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
@@ -1146,7 +1149,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
 def IDIV32r: I<0xF7, MRM7r, (outs),  (ins GR32:$src),   // EDX:EAX/r32 = EAX,EDX
                "idiv{l}\t$src", []>;
 let mayLoad = 1, mayLoad = 1 in {
-let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
 def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
                "idiv{b}\t$src", []>;
 let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 09accb6b8572..07fb15eb8256 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -117,11 +117,11 @@ public:
   Instruction *visitUDiv(BinaryOperator &I);
   Instruction *visitSDiv(BinaryOperator &I);
   Instruction *visitFDiv(BinaryOperator &I);
-  Instruction *FoldAndOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS);
-  Instruction *FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS);
+  Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+  Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
   Instruction *visitAnd(BinaryOperator &I);
-  Instruction *FoldOrOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS);
-  Instruction *FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS);
+  Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+  Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
   Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
                                    Value *A, Value *B, Value *C);
   Instruction *visitOr (BinaryOperator &I);
@@ -327,8 +327,8 @@ private:
   
   Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask,
                             bool isSub, Instruction &I);
-  Instruction *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
-                               bool isSigned, bool Inside, Instruction &IB);
+  Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+                         bool isSigned, bool Inside);
   Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
   Instruction *MatchBSwap(BinaryOperator &I);
   bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 86673f809624..3fb3de75075e 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -137,80 +137,44 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
 /// opcode and two operands into either a constant true or false, or a brand 
 /// new ICmp instruction. The sign is passed in to determine which kind
 /// of predicate to use in the new icmp instruction.
-static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS) {
+static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
+                           InstCombiner::BuilderTy *Builder) {
+  CmpInst::Predicate Pred;
   switch (Code) {
   default: assert(0 && "Illegal ICmp code!");
-  case 0:
-    return ConstantInt::getFalse(LHS->getContext());
-  case 1: 
-    if (Sign)
-      return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS);
-    return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS);
-  case 2:
-    return new ICmpInst(ICmpInst::ICMP_EQ,  LHS, RHS);
-  case 3: 
-    if (Sign)
-      return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS);
-    return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS);
-  case 4: 
-    if (Sign)
-      return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS);
-    return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS);
-  case 5:
-    return new ICmpInst(ICmpInst::ICMP_NE,  LHS, RHS);
-  case 6: 
-    if (Sign)
-      return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS);
-    return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS);
-  case 7:
-    return ConstantInt::getTrue(LHS->getContext());
+  case 0: // False.
+    return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+  case 1: Pred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
+  case 2: Pred = ICmpInst::ICMP_EQ; break;
+  case 3: Pred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
+  case 4: Pred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
+  case 5: Pred = ICmpInst::ICMP_NE; break;
+  case 6: Pred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
+  case 7: // True.
+    return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
   }
+  return Builder->CreateICmp(Pred, LHS, RHS);
 }
 
 /// getFCmpValue - This is the complement of getFCmpCode, which turns an
 /// opcode and two operands into either a FCmp instruction. isordered is passed
 /// in to determine which kind of predicate to use in the new fcmp instruction.
 static Value *getFCmpValue(bool isordered, unsigned code,
-                           Value *LHS, Value *RHS) {
+                           Value *LHS, Value *RHS,
+                           InstCombiner::BuilderTy *Builder) {
+  CmpInst::Predicate Pred;
   switch (code) {
-  default: llvm_unreachable("Illegal FCmp code!");
-  case  0:
-    if (isordered)
-      return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS);
-    else
-      return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS);
-  case  1: 
-    if (isordered)
-      return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS);
-    else
-      return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS);
-  case  2: 
-    if (isordered)
-      return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS);
-    else
-      return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS);
-  case  3: 
-    if (isordered)
-      return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS);
-    else
-      return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS);
-  case  4: 
-    if (isordered)
-      return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS);
-    else
-      return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS);
-  case  5: 
-    if (isordered)
-      return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS);
-    else
-      return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS);
-  case  6: 
-    if (isordered)
-      return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS);
-    else
-      return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS);
-  case  7: return ConstantInt::getTrue(LHS->getContext());
+  default: assert(0 && "Illegal FCmp code!");
+  case 0: Pred = isordered ? FCmpInst::FCMP_ORD : FCmpInst::FCMP_UNO; break;
+  case 1: Pred = isordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; break;
+  case 2: Pred = isordered ? FCmpInst::FCMP_OEQ : FCmpInst::FCMP_UEQ; break;
+  case 3: Pred = isordered ? FCmpInst::FCMP_OGE : FCmpInst::FCMP_UGE; break;
+  case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break;
+  case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break;
+  case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break;
+  case 7: return ConstantInt::getTrue(LHS->getContext());
   }
+  return Builder->CreateFCmp(Pred, LHS, RHS);
 }
 
 /// PredicatesFoldable - Return true if both predicates match sign or if at
@@ -355,40 +319,39 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
 /// (V-Lo) <u Hi-Lo.  This method expects that Lo <= Hi. isSigned indicates
 /// whether to treat the V, Lo and HI as signed or not. IB is the location to
 /// insert new instructions.
-Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
-                                           bool isSigned, bool Inside, 
-                                           Instruction &IB) {
+Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+                                     bool isSigned, bool Inside) {
   assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? 
             ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
          "Lo is not <= Hi in range emission code!");
     
   if (Inside) {
     if (Lo == Hi)  // Trivially false.
-      return new ICmpInst(ICmpInst::ICMP_NE, V, V);
+      return ConstantInt::getFalse(V->getContext());
 
     // V >= Min && V < Hi --> V < Hi
     if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
       ICmpInst::Predicate pred = (isSigned ? 
         ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT);
-      return new ICmpInst(pred, V, Hi);
+      return Builder->CreateICmp(pred, V, Hi);
     }
 
     // Emit V-Lo <u Hi-Lo
     Constant *NegLo = ConstantExpr::getNeg(Lo);
     Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
     Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
-    return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound);
+    return Builder->CreateICmpULT(Add, UpperBound);
   }
 
   if (Lo == Hi)  // Trivially true.
-    return new ICmpInst(ICmpInst::ICMP_EQ, V, V);
+    return ConstantInt::getTrue(V->getContext());
 
   // V < Min || V >= Hi -> V > Hi-1
   Hi = SubOne(cast<ConstantInt>(Hi));
   if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
     ICmpInst::Predicate pred = (isSigned ? 
         ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
-    return new ICmpInst(pred, V, Hi);
+    return Builder->CreateICmp(pred, V, Hi);
   }
 
   // Emit V-Lo >u Hi-1-Lo
@@ -396,7 +359,7 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
   ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
   Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
   Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
-  return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound);
+  return Builder->CreateICmpUGT(Add, LowerBound);
 }
 
 // isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with
@@ -472,8 +435,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
 }
 
 /// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
-Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
-                                          ICmpInst *LHS, ICmpInst *RHS) {
+Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
 
   // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
@@ -486,11 +448,7 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
       Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
       unsigned Code = getICmpCode(LHS) & getICmpCode(RHS);
       bool isSigned = LHS->isSigned() || RHS->isSigned();
-      Value *RV = getICmpValue(isSigned, Code, Op0, Op1);
-      if (Instruction *I = dyn_cast<Instruction>(RV))
-        return I;
-      // Otherwise, it's a constant boolean value.
-      return ReplaceInstUsesWith(I, RV);
+      return getICmpValue(isSigned, Code, Op0, Op1, Builder);
     }
   }
   
@@ -506,13 +464,13 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     if (LHSCC == ICmpInst::ICMP_ULT &&
         LHSCst->getValue().isPowerOf2()) {
       Value *NewOr = Builder->CreateOr(Val, Val2);
-      return new ICmpInst(LHSCC, NewOr, LHSCst);
+      return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
     }
     
     // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
     if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
       Value *NewOr = Builder->CreateOr(Val, Val2);
-      return new ICmpInst(LHSCC, NewOr, LHSCst);
+      return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
     }
   }
   
@@ -562,33 +520,32 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     case ICmpInst::ICMP_EQ:         // (X == 13 & X == 15) -> false
     case ICmpInst::ICMP_UGT:        // (X == 13 & X >  15) -> false
     case ICmpInst::ICMP_SGT:        // (X == 13 & X >  15) -> false
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
     case ICmpInst::ICMP_NE:         // (X == 13 & X != 15) -> X == 13
     case ICmpInst::ICMP_ULT:        // (X == 13 & X <  15) -> X == 13
     case ICmpInst::ICMP_SLT:        // (X == 13 & X <  15) -> X == 13
-      return ReplaceInstUsesWith(I, LHS);
+      return LHS;
     }
   case ICmpInst::ICMP_NE:
     switch (RHSCC) {
     default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_ULT:
       if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
-        return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst);
+        return Builder->CreateICmpULT(Val, LHSCst);
       break;                        // (X != 13 & X u< 15) -> no change
     case ICmpInst::ICMP_SLT:
       if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
-        return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst);
+        return Builder->CreateICmpSLT(Val, LHSCst);
       break;                        // (X != 13 & X s< 15) -> no change
     case ICmpInst::ICMP_EQ:         // (X != 13 & X == 15) -> X == 15
     case ICmpInst::ICMP_UGT:        // (X != 13 & X u> 15) -> X u> 15
     case ICmpInst::ICMP_SGT:        // (X != 13 & X s> 15) -> X s> 15
-      return ReplaceInstUsesWith(I, RHS);
+      return RHS;
     case ICmpInst::ICMP_NE:
       if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
         Constant *AddCST = ConstantExpr::getNeg(LHSCst);
         Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
-        return new ICmpInst(ICmpInst::ICMP_UGT, Add,
-                            ConstantInt::get(Add->getType(), 1));
+        return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1));
       }
       break;                        // (X != 13 & X != 15) -> no change
     }
@@ -598,12 +555,12 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X u< 13 & X == 15) -> false
     case ICmpInst::ICMP_UGT:        // (X u< 13 & X u> 15) -> false
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
     case ICmpInst::ICMP_SGT:        // (X u< 13 & X s> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X u< 13 & X != 15) -> X u< 13
     case ICmpInst::ICMP_ULT:        // (X u< 13 & X u< 15) -> X u< 13
-      return ReplaceInstUsesWith(I, LHS);
+      return LHS;
     case ICmpInst::ICMP_SLT:        // (X u< 13 & X s< 15) -> no change
       break;
     }
@@ -613,12 +570,12 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X s< 13 & X == 15) -> false
     case ICmpInst::ICMP_SGT:        // (X s< 13 & X s> 15) -> false
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
     case ICmpInst::ICMP_UGT:        // (X s< 13 & X u> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X s< 13 & X != 15) -> X < 13
     case ICmpInst::ICMP_SLT:        // (X s< 13 & X s< 15) -> X < 13
-      return ReplaceInstUsesWith(I, LHS);
+      return LHS;
     case ICmpInst::ICMP_ULT:        // (X s< 13 & X u< 15) -> no change
       break;
     }
@@ -628,16 +585,15 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X u> 13 & X == 15) -> X == 15
     case ICmpInst::ICMP_UGT:        // (X u> 13 & X u> 15) -> X u> 15
-      return ReplaceInstUsesWith(I, RHS);
+      return RHS;
     case ICmpInst::ICMP_SGT:        // (X u> 13 & X s> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:
       if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14
-        return new ICmpInst(LHSCC, Val, RHSCst);
+        return Builder->CreateICmp(LHSCC, Val, RHSCst);
       break;                        // (X u> 13 & X != 15) -> no change
     case ICmpInst::ICMP_ULT:        // (X u> 13 & X u< 15) -> (X-14) <u 1
-      return InsertRangeTest(Val, AddOne(LHSCst),
-                             RHSCst, false, true, I);
+      return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, false, true);
     case ICmpInst::ICMP_SLT:        // (X u> 13 & X s< 15) -> no change
       break;
     }
@@ -647,16 +603,15 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X s> 13 & X == 15) -> X == 15
     case ICmpInst::ICMP_SGT:        // (X s> 13 & X s> 15) -> X s> 15
-      return ReplaceInstUsesWith(I, RHS);
+      return RHS;
     case ICmpInst::ICMP_UGT:        // (X s> 13 & X u> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:
       if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14
-        return new ICmpInst(LHSCC, Val, RHSCst);
+        return Builder->CreateICmp(LHSCC, Val, RHSCst);
       break;                        // (X s> 13 & X != 15) -> no change
     case ICmpInst::ICMP_SLT:        // (X s> 13 & X s< 15) -> (X-14) s< 1
-      return InsertRangeTest(Val, AddOne(LHSCst),
-                             RHSCst, true, true, I);
+      return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, true, true);
     case ICmpInst::ICMP_ULT:        // (X s> 13 & X u< 15) -> no change
       break;
     }
@@ -666,9 +621,10 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
   return 0;
 }
 
-Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS,
-                                          FCmpInst *RHS) {
-  
+/// FoldAndOfFCmps - Optimize (fcmp)&(fcmp).  NOTE: Unlike the rest of
+/// instcombine, this returns a Value which should already be inserted into the
+/// function.
+Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
   if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
       RHS->getPredicate() == FCmpInst::FCMP_ORD) {
     // (fcmp ord x, c) & (fcmp ord y, c)  -> (fcmp ord x, y)
@@ -677,17 +633,15 @@ Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS,
         // If either of the constants are nans, then the whole thing returns
         // false.
         if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
-          return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
-        return new FCmpInst(FCmpInst::FCMP_ORD,
-                            LHS->getOperand(0), RHS->getOperand(0));
+          return ConstantInt::getFalse(LHS->getContext());
+        return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
       }
     
     // Handle vector zeros.  This occurs because the canonical form of
     // "fcmp ord x,x" is "fcmp ord x, 0".
     if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
         isa<ConstantAggregateZero>(RHS->getOperand(1)))
-      return new FCmpInst(FCmpInst::FCMP_ORD,
-                          LHS->getOperand(0), RHS->getOperand(0));
+      return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
     return 0;
   }
   
@@ -705,14 +659,13 @@ Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS,
   if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
     // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
     if (Op0CC == Op1CC)
-      return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
-    
+      return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
     if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE)
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
     if (Op0CC == FCmpInst::FCMP_TRUE)
-      return ReplaceInstUsesWith(I, RHS);
+      return RHS;
     if (Op1CC == FCmpInst::FCMP_TRUE)
-      return ReplaceInstUsesWith(I, LHS);
+      return LHS;
     
     bool Op0Ordered;
     bool Op1Ordered;
@@ -727,14 +680,14 @@ Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS,
       // uno && ueq -> uno && (uno || eq) -> ueq
       // ord && olt -> ord && (ord && lt) -> olt
       if (Op0Ordered == Op1Ordered)
-        return ReplaceInstUsesWith(I, RHS);
+        return RHS;
       
       // uno && oeq -> uno && (ord && eq) -> false
       // uno && ord -> false
       if (!Op0Ordered)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+        return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
       // ord && ueq -> ord && (uno || eq) -> oeq
-      return cast<Instruction>(getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS));
+      return getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS, Builder);
     }
   }
 
@@ -930,14 +883,14 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   
   if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1))
     if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
-      if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS))
-        return Res;
+      if (Value *Res = FoldAndOfICmps(LHS, RHS))
+        return ReplaceInstUsesWith(I, Res);
   
   // If and'ing two fcmp, try combine them into one.
   if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
     if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
-      if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS))
-        return Res;
+      if (Value *Res = FoldAndOfFCmps(LHS, RHS))
+        return ReplaceInstUsesWith(I, Res);
   
   
   // fold (and (cast A), (cast B)) -> (cast (and A, B))
@@ -960,19 +913,15 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         // cast is otherwise not optimizable.  This happens for vector sexts.
         if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
           if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
-            if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) {
-              InsertNewInstBefore(Res, I);
+            if (Value *Res = FoldAndOfICmps(LHS, RHS))
               return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
-            }
         
         // If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the
         // cast is otherwise not optimizable.  This happens for vector sexts.
         if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
           if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
-            if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) {
-              InsertNewInstBefore(Res, I);
+            if (Value *Res = FoldAndOfFCmps(LHS, RHS))
               return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
-            }
       }
     }
     
@@ -1179,8 +1128,7 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
 }
 
 /// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
-Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
-                                         ICmpInst *LHS, ICmpInst *RHS) {
+Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
 
   // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
@@ -1193,11 +1141,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
       Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
       unsigned Code = getICmpCode(LHS) | getICmpCode(RHS);
       bool isSigned = LHS->isSigned() || RHS->isSigned();
-      Value *RV = getICmpValue(isSigned, Code, Op0, Op1);
-      if (Instruction *I = dyn_cast<Instruction>(RV))
-        return I;
-      // Otherwise, it's a constant boolean value.
-      return ReplaceInstUsesWith(I, RV);
+      return getICmpValue(isSigned, Code, Op0, Op1, Builder);
     }
   }
   
@@ -1211,7 +1155,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
   if (LHSCst == RHSCst && LHSCC == RHSCC &&
       LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
     Value *NewOr = Builder->CreateOr(Val, Val2);
-    return new ICmpInst(LHSCC, NewOr, LHSCst);
+    return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
   }
   
   // From here on, we only handle:
@@ -1263,7 +1207,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
         Constant *AddCST = ConstantExpr::getNeg(LHSCst);
         Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
         AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
-        return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST);
+        return Builder->CreateICmpULT(Add, AddCST);
       }
       break;                         // (X == 13 | X == 15) -> no change
     case ICmpInst::ICMP_UGT:         // (X == 13 | X u> 14) -> no change
@@ -1272,7 +1216,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
     case ICmpInst::ICMP_NE:          // (X == 13 | X != 15) -> X != 15
     case ICmpInst::ICMP_ULT:         // (X == 13 | X u< 15) -> X u< 15
     case ICmpInst::ICMP_SLT:         // (X == 13 | X s< 15) -> X s< 15
-      return ReplaceInstUsesWith(I, RHS);
+      return RHS;
     }
     break;
   case ICmpInst::ICMP_NE:
@@ -1281,11 +1225,11 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
     case ICmpInst::ICMP_EQ:          // (X != 13 | X == 15) -> X != 13
     case ICmpInst::ICMP_UGT:         // (X != 13 | X u> 15) -> X != 13
     case ICmpInst::ICMP_SGT:         // (X != 13 | X s> 15) -> X != 13
-      return ReplaceInstUsesWith(I, LHS);
+      return LHS;
     case ICmpInst::ICMP_NE:          // (X != 13 | X != 15) -> true
     case ICmpInst::ICMP_ULT:         // (X != 13 | X u< 15) -> true
     case ICmpInst::ICMP_SLT:         // (X != 13 | X s< 15) -> true
-      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      return ConstantInt::getTrue(LHS->getContext());
     }
     break;
   case ICmpInst::ICMP_ULT:
@@ -1297,14 +1241,13 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
       // If RHSCst is [us]MAXINT, it is always false.  Not handling
       // this can cause overflow.
       if (RHSCst->isMaxValue(false))
-        return ReplaceInstUsesWith(I, LHS);
-      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
-                             false, false, I);
+        return LHS;
+      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), false, false);
     case ICmpInst::ICMP_SGT:        // (X u< 13 | X s> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X u< 13 | X != 15) -> X != 15
     case ICmpInst::ICMP_ULT:        // (X u< 13 | X u< 15) -> X u< 15
-      return ReplaceInstUsesWith(I, RHS);
+      return RHS;
     case ICmpInst::ICMP_SLT:        // (X u< 13 | X s< 15) -> no change
       break;
     }
@@ -1318,14 +1261,13 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
       // If RHSCst is [us]MAXINT, it is always false.  Not handling
       // this can cause overflow.
       if (RHSCst->isMaxValue(true))
-        return ReplaceInstUsesWith(I, LHS);
-      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
-                             true, false, I);
+        return LHS;
+      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), true, false);
     case ICmpInst::ICMP_UGT:        // (X s< 13 | X u> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X s< 13 | X != 15) -> X != 15
     case ICmpInst::ICMP_SLT:        // (X s< 13 | X s< 15) -> X s< 15
-      return ReplaceInstUsesWith(I, RHS);
+      return RHS;
     case ICmpInst::ICMP_ULT:        // (X s< 13 | X u< 15) -> no change
       break;
     }
@@ -1335,12 +1277,12 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
     default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X u> 13 | X == 15) -> X u> 13
     case ICmpInst::ICMP_UGT:        // (X u> 13 | X u> 15) -> X u> 13
-      return ReplaceInstUsesWith(I, LHS);
+      return LHS;
     case ICmpInst::ICMP_SGT:        // (X u> 13 | X s> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X u> 13 | X != 15) -> true
     case ICmpInst::ICMP_ULT:        // (X u> 13 | X u< 15) -> true
-      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      return ConstantInt::getTrue(LHS->getContext());
     case ICmpInst::ICMP_SLT:        // (X u> 13 | X s< 15) -> no change
       break;
     }
@@ -1350,12 +1292,12 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
     default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X s> 13 | X == 15) -> X > 13
     case ICmpInst::ICMP_SGT:        // (X s> 13 | X s> 15) -> X > 13
-      return ReplaceInstUsesWith(I, LHS);
+      return LHS;
     case ICmpInst::ICMP_UGT:        // (X s> 13 | X u> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X s> 13 | X != 15) -> true
     case ICmpInst::ICMP_SLT:        // (X s> 13 | X s< 15) -> true
-      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      return ConstantInt::getTrue(LHS->getContext());
     case ICmpInst::ICMP_ULT:        // (X s> 13 | X u< 15) -> no change
       break;
     }
@@ -1364,8 +1306,10 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
   return 0;
 }
 
-Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS,
-                                         FCmpInst *RHS) {
+/// FoldOrOfFCmps - Optimize (fcmp)|(fcmp).  NOTE: Unlike the rest of
+/// instcombine, this returns a Value which should already be inserted into the
+/// function.
+Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
   if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
       RHS->getPredicate() == FCmpInst::FCMP_UNO && 
       LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
@@ -1374,20 +1318,18 @@ Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS,
         // If either of the constants are nans, then the whole thing returns
         // true.
         if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
-          return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+          return ConstantInt::getTrue(LHS->getContext());
         
         // Otherwise, no need to compare the two constants, compare the
         // rest.
-        return new FCmpInst(FCmpInst::FCMP_UNO,
-                            LHS->getOperand(0), RHS->getOperand(0));
+        return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
       }
     
     // Handle vector zeros.  This occurs because the canonical form of
     // "fcmp uno x,x" is "fcmp uno x, 0".
     if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
         isa<ConstantAggregateZero>(RHS->getOperand(1)))
-      return new FCmpInst(FCmpInst::FCMP_UNO,
-                          LHS->getOperand(0), RHS->getOperand(0));
+      return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
     
     return 0;
   }
@@ -1404,14 +1346,13 @@ Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS,
   if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
     // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y).
     if (Op0CC == Op1CC)
-      return new FCmpInst((FCmpInst::Predicate)Op0CC,
-                          Op0LHS, Op0RHS);
+      return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
     if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE)
-      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
     if (Op0CC == FCmpInst::FCMP_FALSE)
-      return ReplaceInstUsesWith(I, RHS);
+      return RHS;
     if (Op1CC == FCmpInst::FCMP_FALSE)
-      return ReplaceInstUsesWith(I, LHS);
+      return LHS;
     bool Op0Ordered;
     bool Op1Ordered;
     unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
@@ -1419,11 +1360,7 @@ Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS,
     if (Op0Ordered == Op1Ordered) {
       // If both are ordered or unordered, return a new fcmp with
       // or'ed predicates.
-      Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS);
-      if (Instruction *I = dyn_cast<Instruction>(RV))
-        return I;
-      // Otherwise, it's a constant boolean value...
-      return ReplaceInstUsesWith(I, RV);
+      return getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS, Builder);
     }
   }
   return 0;
@@ -1686,14 +1623,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
 
   if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
     if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
-      if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS))
-        return Res;
+      if (Value *Res = FoldOrOfICmps(LHS, RHS))
+        return ReplaceInstUsesWith(I, Res);
     
   // (fcmp uno x, c) | (fcmp uno y, c)  -> (fcmp uno x, y)
   if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
     if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
-      if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS))
-        return Res;
+      if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+        return ReplaceInstUsesWith(I, Res);
   
   // fold (or (cast A), (cast B)) -> (cast (or A, B))
   if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
@@ -1717,19 +1654,15 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
           // cast is otherwise not optimizable.  This happens for vector sexts.
           if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
             if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
-              if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) {
-                InsertNewInstBefore(Res, I);
+              if (Value *Res = FoldOrOfICmps(LHS, RHS))
                 return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
-              }
           
           // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
           // cast is otherwise not optimizable.  This happens for vector sexts.
           if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
             if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
-              if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) {
-                InsertNewInstBefore(Res, I);
+              if (Value *Res = FoldOrOfFCmps(LHS, RHS))
                 return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
-              }
         }
       }
   }
@@ -2005,11 +1938,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
           Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
           unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
           bool isSigned = LHS->isSigned() || RHS->isSigned();
-          Value *RV = getICmpValue(isSigned, Code, Op0, Op1);
-          if (Instruction *I = dyn_cast<Instruction>(RV))
-            return I;
-          // Otherwise, it's a constant boolean value.
-          return ReplaceInstUsesWith(I, RV);
+          return ReplaceInstUsesWith(I, 
+                               getICmpValue(isSigned, Code, Op0, Op1, Builder));
         }
       }
 
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 835d149eab48..a241f169f28a 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -304,29 +304,39 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   switch (II->getIntrinsicID()) {
   default: break;
   case Intrinsic::objectsize: {
-    const Type *ReturnTy = CI.getType();
-    Value *Op1 = II->getOperand(1);
-    bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1);
-    
     // We need target data for just about everything so depend on it.
     if (!TD) break;
     
+    const Type *ReturnTy = CI.getType();
+    bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1);
+
     // Get to the real allocated thing and offset as fast as possible.
-    Op1 = Op1->stripPointerCasts();
+    Value *Op1 = II->getOperand(1)->stripPointerCasts();
     
     // If we've stripped down to a single global variable that we
     // can know the size of then just return that.
     if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) {
       if (GV->hasDefinitiveInitializer()) {
         Constant *C = GV->getInitializer();
-        uint64_t globalSize = TD->getTypeAllocSize(C->getType());
-        return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, globalSize));
+        uint64_t GlobalSize = TD->getTypeAllocSize(C->getType());
+        return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, GlobalSize));
       } else {
+        // Can't determine size of the GV.
         Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
         return ReplaceInstUsesWith(CI, RetVal);
       }
-    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op1)) {
-      
+    } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
+      // Get alloca size.
+      if (AI->getAllocatedType()->isSized()) {
+        uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
+        if (AI->isArrayAllocation()) {
+          const ConstantInt *C = dyn_cast<ConstantInt>(AI->getArraySize());
+          if (!C) break;
+          AllocaSize *= C->getZExtValue();
+        }
+        return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, AllocaSize));
+      }
+    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op1)) {      
       // Only handle constant GEPs here.
       if (CE->getOpcode() != Instruction::GetElementPtr) break;
       GEPOperator *GEP = cast<GEPOperator>(CE);
@@ -361,6 +371,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       return ReplaceInstUsesWith(CI, RetVal);
       
     }
+
+    // Do not return "I don't know" here. Later optimization passes could
+    // make it possible to evaluate objectsize to a constant.
+    break;
   }
   case Intrinsic::bswap:
     // bswap(bswap(x)) -> x
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 518af7477d0e..72fd5588d120 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -877,25 +877,26 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
   case ICmpInst::ICMP_EQ:
     if (LoOverflow && HiOverflow)
       return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
-    else if (HiOverflow)
+    if (HiOverflow)
       return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
                           ICmpInst::ICMP_UGE, X, LoBound);
-    else if (LoOverflow)
+    if (LoOverflow)
       return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
                           ICmpInst::ICMP_ULT, X, HiBound);
-    else
-      return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI);
+    return ReplaceInstUsesWith(ICI,
+                               InsertRangeTest(X, LoBound, HiBound, DivIsSigned,
+                                               true));
   case ICmpInst::ICMP_NE:
     if (LoOverflow && HiOverflow)
       return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
-    else if (HiOverflow)
+    if (HiOverflow)
       return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
                           ICmpInst::ICMP_ULT, X, LoBound);
-    else if (LoOverflow)
+    if (LoOverflow)
       return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
                           ICmpInst::ICMP_UGE, X, HiBound);
-    else
-      return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI);
+    return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+                                                    DivIsSigned, false));
   case ICmpInst::ICMP_ULT:
   case ICmpInst::ICMP_SLT:
     if (LoOverflow == +1)   // Low bound is greater than input range.
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index fba83542cdb4..65f039368704 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -266,6 +266,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
   // and if TD isn't around, we can't handle the mixed case.
   bool isVolatile = FirstLI->isVolatile();
   unsigned LoadAlignment = FirstLI->getAlignment();
+  unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
   
   // We can't sink the load if the loaded value could be modified between the
   // load and the PHI.
@@ -290,6 +291,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
     // the load and the PHI.
     if (LI->isVolatile() != isVolatile ||
         LI->getParent() != PN.getIncomingBlock(i) ||
+        LI->getPointerAddressSpace() != LoadAddrSpace ||
         !isSafeAndProfitableToSinkLoad(LI))
       return 0;
       
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 12827b6a4ef1..5aca9cdc659c 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -597,19 +597,35 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
 
 /// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively
 /// add its operands as factors, otherwise add V to the list of factors.
+///
+/// Ops is the top-level list of add operands we're trying to factor.
 static void FindSingleUseMultiplyFactors(Value *V,
-                                         SmallVectorImpl<Value*> &Factors) {
+                                         SmallVectorImpl<Value*> &Factors,
+                                       const SmallVectorImpl<ValueEntry> &Ops,
+                                         bool IsRoot) {
   BinaryOperator *BO;
-  if ((!V->hasOneUse() && !V->use_empty()) ||
+  if (!(V->hasOneUse() || V->use_empty()) || // More than one use.
       !(BO = dyn_cast<BinaryOperator>(V)) ||
       BO->getOpcode() != Instruction::Mul) {
     Factors.push_back(V);
     return;
   }
   
+  // If this value has a single use because it is another input to the add
+  // tree we're reassociating and we dropped its use, it actually has two
+  // uses and we can't factor it.
+  if (!IsRoot) {
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+      if (Ops[i].Op == V) {
+        Factors.push_back(V);
+        return;
+      }
+  }
+  
+  
   // Otherwise, add the LHS and RHS to the list of factors.
-  FindSingleUseMultiplyFactors(BO->getOperand(1), Factors);
-  FindSingleUseMultiplyFactors(BO->getOperand(0), Factors);
+  FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops, false);
+  FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops, false);
 }
 
 /// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor'
@@ -753,7 +769,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
     
     // Compute all of the factors of this added value.
     SmallVector<Value*, 8> Factors;
-    FindSingleUseMultiplyFactors(BOp, Factors);
+    FindSingleUseMultiplyFactors(BOp, Factors, Ops, true);
     assert(Factors.size() > 1 && "Bad linearize!");
     
     // Add one to FactorOccurrences for each unique factor in this op.
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index cde214bac230..86ddeac13ee6 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -17,6 +17,7 @@
 
 #define DEBUG_TYPE "simplify-libcalls"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
@@ -67,496 +68,14 @@ public:
       Context = &CI->getCalledFunction()->getContext();
     return CallOptimizer(CI->getCalledFunction(), CI, B);
   }
-
-  /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
-  Value *CastToCStr(Value *V, IRBuilder<> &B);
-
-  /// EmitStrLen - Emit a call to the strlen function to the builder, for the
-  /// specified pointer.  Ptr is required to be some pointer type, and the
-  /// return value has 'intptr_t' type.
-  Value *EmitStrLen(Value *Ptr, IRBuilder<> &B);
-
-  /// EmitStrChr - Emit a call to the strchr function to the builder, for the
-  /// specified pointer and character.  Ptr is required to be some pointer type,
-  /// and the return value has 'i8*' type.
-  Value *EmitStrChr(Value *Ptr, char C, IRBuilder<> &B);
-
-  /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
-  /// specified pointer arguments.
-  Value *EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B);
-  
-  /// EmitMemCpy - Emit a call to the memcpy function to the builder.  This
-  /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
-  Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len,
-                    unsigned Align, IRBuilder<> &B);
-
-  /// EmitMemMove - Emit a call to the memmove function to the builder.  This
-  /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
-  Value *EmitMemMove(Value *Dst, Value *Src, Value *Len,
-		     unsigned Align, IRBuilder<> &B);
-
-  /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
-  /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
-  Value *EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B);
-
-  /// EmitMemCmp - Emit a call to the memcmp function.
-  Value *EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B);
-
-  /// EmitMemSet - Emit a call to the memset function
-  Value *EmitMemSet(Value *Dst, Value *Val, Value *Len, IRBuilder<> &B);
-
-  /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name'
-  /// (e.g.  'floor').  This function is known to take a single of type matching
-  /// 'Op' and returns one value with the same type.  If 'Op' is a long double,
-  /// 'l' is added as the suffix of name, if 'Op' is a float, we add a 'f'
-  /// suffix.
-  Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B,
-                              const AttrListPtr &Attrs);
-
-  /// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
-  /// is an integer.
-  Value *EmitPutChar(Value *Char, IRBuilder<> &B);
-
-  /// EmitPutS - Emit a call to the puts function.  This assumes that Str is
-  /// some pointer.
-  void EmitPutS(Value *Str, IRBuilder<> &B);
-
-  /// EmitFPutC - Emit a call to the fputc function.  This assumes that Char is
-  /// an i32, and File is a pointer to FILE.
-  void EmitFPutC(Value *Char, Value *File, IRBuilder<> &B);
-
-  /// EmitFPutS - Emit a call to the puts function.  Str is required to be a
-  /// pointer and File is a pointer to FILE.
-  void EmitFPutS(Value *Str, Value *File, IRBuilder<> &B);
-
-  /// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is
-  /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
-  void EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B);
-
 };
 } // End anonymous namespace.
 
-/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
-Value *LibCallOptimization::CastToCStr(Value *V, IRBuilder<> &B) {
-  return B.CreateBitCast(V, Type::getInt8PtrTy(*Context), "cstr");
-}
-
-/// EmitStrLen - Emit a call to the strlen function to the builder, for the
-/// specified pointer.  This always returns an integer value of size intptr_t.
-Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
-  AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
-                                   Attribute::NoUnwind);
-
-  Constant *StrLen =M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2),
-                                           TD->getIntPtrType(*Context),
-                                           Type::getInt8PtrTy(*Context),
-                                           NULL);
-  CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
-  if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-
-  return CI;
-}
-
-/// EmitStrChr - Emit a call to the strchr function to the builder, for the
-/// specified pointer and character.  Ptr is required to be some pointer type,
-/// and the return value has 'i8*' type.
-Value *LibCallOptimization::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  AttributeWithIndex AWI =
-    AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
-
-  const Type *I8Ptr = Type::getInt8PtrTy(*Context);
-  const Type *I32Ty = Type::getInt32Ty(*Context);
-  Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1),
-                                            I8Ptr, I8Ptr, I32Ty, NULL);
-  CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
-                               ConstantInt::get(I32Ty, C), "strchr");
-  if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-  return CI;
-}
-
-/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
-/// specified pointer arguments.
-Value *LibCallOptimization::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
-  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
-  const Type *I8Ptr = Type::getInt8PtrTy(*Context);
-  Value *StrCpy = M->getOrInsertFunction("strcpy", AttrListPtr::get(AWI, 2),
-                                         I8Ptr, I8Ptr, I8Ptr, NULL);
-  CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
-                               "strcpy");
-  if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-  return CI;
-}
-
-/// EmitMemCpy - Emit a call to the memcpy function to the builder.  This always
-/// expects that the size has type 'intptr_t' and Dst/Src are pointers.
-Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len,
-                                       unsigned Align, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  const Type *Ty = Len->getType();
-  Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, &Ty, 1);
-  Dst = CastToCStr(Dst, B);
-  Src = CastToCStr(Src, B);
-  return B.CreateCall4(MemCpy, Dst, Src, Len,
-                       ConstantInt::get(Type::getInt32Ty(*Context), Align));
-}
-
-/// EmitMemMove - Emit a call to the memmove function to the builder.  This
-/// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
-Value *LibCallOptimization::EmitMemMove(Value *Dst, Value *Src, Value *Len,
-					unsigned Align, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  const Type *Ty = TD->getIntPtrType(*Context);
-  Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, &Ty, 1);
-  Dst = CastToCStr(Dst, B);
-  Src = CastToCStr(Src, B);
-  Value *A = ConstantInt::get(Type::getInt32Ty(*Context), Align);
-  return B.CreateCall4(MemMove, Dst, Src, Len, A);
-}
-
-/// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
-/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
-Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val,
-                                       Value *Len, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  AttributeWithIndex AWI;
-  AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
-
-  Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1),
-                                         Type::getInt8PtrTy(*Context),
-                                         Type::getInt8PtrTy(*Context),
-                                         Type::getInt32Ty(*Context),
-                                         TD->getIntPtrType(*Context),
-                                         NULL);
-  CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
-
-  if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-
-  return CI;
-}
-
-/// EmitMemCmp - Emit a call to the memcmp function.
-Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2,
-                                       Value *Len, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
-  AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
-  AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
-                                   Attribute::NoUnwind);
-
-  Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3),
-                                         Type::getInt32Ty(*Context),
-                                         Type::getInt8PtrTy(*Context),
-                                         Type::getInt8PtrTy(*Context),
-                                         TD->getIntPtrType(*Context), NULL);
-  CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
-                               Len, "memcmp");
-
-  if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-
-  return CI;
-}
-
-/// EmitMemSet - Emit a call to the memset function
-Value *LibCallOptimization::EmitMemSet(Value *Dst, Value *Val,
-                                       Value *Len, IRBuilder<> &B) {
- Module *M = Caller->getParent();
- Intrinsic::ID IID = Intrinsic::memset;
- const Type *Tys[1];
- Tys[0] = Len->getType();
- Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1);
- Value *Align = ConstantInt::get(Type::getInt32Ty(*Context), 1);
- return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align);
-}
-
-/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
-/// 'floor').  This function is known to take a single of type matching 'Op' and
-/// returns one value with the same type.  If 'Op' is a long double, 'l' is
-/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
-Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,
-                                                 IRBuilder<> &B,
-                                                 const AttrListPtr &Attrs) {
-  char NameBuffer[20];
-  if (!Op->getType()->isDoubleTy()) {
-    // If we need to add a suffix, copy into NameBuffer.
-    unsigned NameLen = strlen(Name);
-    assert(NameLen < sizeof(NameBuffer)-2);
-    memcpy(NameBuffer, Name, NameLen);
-    if (Op->getType()->isFloatTy())
-      NameBuffer[NameLen] = 'f';  // floorf
-    else
-      NameBuffer[NameLen] = 'l';  // floorl
-    NameBuffer[NameLen+1] = 0;
-    Name = NameBuffer;
-  }
-
-  Module *M = Caller->getParent();
-  Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
-                                         Op->getType(), NULL);
-  CallInst *CI = B.CreateCall(Callee, Op, Name);
-  CI->setAttributes(Attrs);
-  if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-
-  return CI;
-}
-
-/// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
-/// is an integer.
-Value *LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  Value *PutChar = M->getOrInsertFunction("putchar", Type::getInt32Ty(*Context),
-                                          Type::getInt32Ty(*Context), NULL);
-  CallInst *CI = B.CreateCall(PutChar,
-                              B.CreateIntCast(Char,
-                              Type::getInt32Ty(*Context),
-                              /*isSigned*/true,
-                              "chari"),
-                              "putchar");
-
-  if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-  return CI;
-}
-
-/// EmitPutS - Emit a call to the puts function.  This assumes that Str is
-/// some pointer.
-void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
-  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
-
-  Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
-                                       Type::getInt32Ty(*Context),
-                                       Type::getInt8PtrTy(*Context),
-                                       NULL);
-  CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
-  if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-
-}
-
-/// EmitFPutC - Emit a call to the fputc function.  This assumes that Char is
-/// an integer and File is a pointer to FILE.
-void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
-  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
-  Constant *F;
-  if (File->getType()->isPointerTy())
-    F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2),
-                               Type::getInt32Ty(*Context),
-                               Type::getInt32Ty(*Context), File->getType(),
-                               NULL);
-  else
-    F = M->getOrInsertFunction("fputc",
-                               Type::getInt32Ty(*Context),
-                               Type::getInt32Ty(*Context),
-                               File->getType(), NULL);
-  Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), /*isSigned*/true,
-                         "chari");
-  CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
-
-  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
-    CI->setCallingConv(Fn->getCallingConv());
-}
-
-/// EmitFPutS - Emit a call to the puts function.  Str is required to be a
-/// pointer and File is a pointer to FILE.
-void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
-  AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
-  AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
-  Constant *F;
-  if (File->getType()->isPointerTy())
-    F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3),
-                               Type::getInt32Ty(*Context),
-                               Type::getInt8PtrTy(*Context),
-                               File->getType(), NULL);
-  else
-    F = M->getOrInsertFunction("fputs", Type::getInt32Ty(*Context),
-                               Type::getInt8PtrTy(*Context),
-                               File->getType(), NULL);
-  CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
-
-  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
-    CI->setCallingConv(Fn->getCallingConv());
-}
-
-/// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is
-/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
-void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File,
-                                     IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
-  AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture);
-  AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
-  Constant *F;
-  if (File->getType()->isPointerTy())
-    F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3),
-                               TD->getIntPtrType(*Context),
-                               Type::getInt8PtrTy(*Context),
-                               TD->getIntPtrType(*Context),
-                               TD->getIntPtrType(*Context),
-                               File->getType(), NULL);
-  else
-    F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(*Context),
-                               Type::getInt8PtrTy(*Context),
-                               TD->getIntPtrType(*Context),
-                               TD->getIntPtrType(*Context),
-                               File->getType(), NULL);
-  CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
-                        ConstantInt::get(TD->getIntPtrType(*Context), 1), File);
-
-  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
-    CI->setCallingConv(Fn->getCallingConv());
-}
 
 //===----------------------------------------------------------------------===//
 // Helper Functions
 //===----------------------------------------------------------------------===//
 
-/// GetStringLengthH - If we can compute the length of the string pointed to by
-/// the specified pointer, return 'len+1'.  If we can't, return 0.
-static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
-  // Look through noop bitcast instructions.
-  if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
-    return GetStringLengthH(BCI->getOperand(0), PHIs);
-
-  // If this is a PHI node, there are two cases: either we have already seen it
-  // or we haven't.
-  if (PHINode *PN = dyn_cast<PHINode>(V)) {
-    if (!PHIs.insert(PN))
-      return ~0ULL;  // already in the set.
-
-    // If it was new, see if all the input strings are the same length.
-    uint64_t LenSoFar = ~0ULL;
-    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-      uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs);
-      if (Len == 0) return 0; // Unknown length -> unknown.
-
-      if (Len == ~0ULL) continue;
-
-      if (Len != LenSoFar && LenSoFar != ~0ULL)
-        return 0;    // Disagree -> unknown.
-      LenSoFar = Len;
-    }
-
-    // Success, all agree.
-    return LenSoFar;
-  }
-
-  // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
-  if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
-    uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
-    if (Len1 == 0) return 0;
-    uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs);
-    if (Len2 == 0) return 0;
-    if (Len1 == ~0ULL) return Len2;
-    if (Len2 == ~0ULL) return Len1;
-    if (Len1 != Len2) return 0;
-    return Len1;
-  }
-
-  // If the value is not a GEP instruction nor a constant expression with a
-  // GEP instruction, then return unknown.
-  User *GEP = 0;
-  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
-    GEP = GEPI;
-  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-    if (CE->getOpcode() != Instruction::GetElementPtr)
-      return 0;
-    GEP = CE;
-  } else {
-    return 0;
-  }
-
-  // Make sure the GEP has exactly three arguments.
-  if (GEP->getNumOperands() != 3)
-    return 0;
-
-  // Check to make sure that the first operand of the GEP is an integer and
-  // has value 0 so that we are sure we're indexing into the initializer.
-  if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) {
-    if (!Idx->isZero())
-      return 0;
-  } else
-    return 0;
-
-  // If the second index isn't a ConstantInt, then this is a variable index
-  // into the array.  If this occurs, we can't say anything meaningful about
-  // the string.
-  uint64_t StartIdx = 0;
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
-    StartIdx = CI->getZExtValue();
-  else
-    return 0;
-
-  // The GEP instruction, constant or instruction, must reference a global
-  // variable that is a constant and is initialized. The referenced constant
-  // initializer is the array that we'll use for optimization.
-  GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
-  if (!GV || !GV->isConstant() || !GV->hasInitializer() ||
-      GV->mayBeOverridden())
-    return 0;
-  Constant *GlobalInit = GV->getInitializer();
-
-  // Handle the ConstantAggregateZero case, which is a degenerate case. The
-  // initializer is constant zero so the length of the string must be zero.
-  if (isa<ConstantAggregateZero>(GlobalInit))
-    return 1;  // Len = 0 offset by 1.
-
-  // Must be a Constant Array
-  ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
-  if (!Array || !Array->getType()->getElementType()->isIntegerTy(8))
-    return false;
-
-  // Get the number of elements in the array
-  uint64_t NumElts = Array->getType()->getNumElements();
-
-  // Traverse the constant array from StartIdx (derived above) which is
-  // the place the GEP refers to in the array.
-  for (unsigned i = StartIdx; i != NumElts; ++i) {
-    Constant *Elt = Array->getOperand(i);
-    ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
-    if (!CI) // This array isn't suitable, non-int initializer.
-      return 0;
-    if (CI->isZero())
-      return i-StartIdx+1; // We found end of string, success!
-  }
-
-  return 0; // The array isn't null terminated, conservatively return 'unknown'.
-}
-
-/// GetStringLength - If we can compute the length of the string pointed to by
-/// the specified pointer, return 'len+1'.  If we can't, return 0.
-static uint64_t GetStringLength(Value *V) {
-  if (!V->getType()->isPointerTy()) return 0;
-
-  SmallPtrSet<PHINode*, 32> PHIs;
-  uint64_t Len = GetStringLengthH(V, PHIs);
-  // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
-  // an empty string as a length.
-  return Len == ~0ULL ? 1 : Len;
-}
-
 /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
 /// value is equal or not-equal to zero.
 static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
@@ -613,7 +132,7 @@ struct StrCatOpt : public LibCallOptimization {
   void EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
     // We need to find the end of the destination string.  That's where the
     // memory is to be moved to. We just generate a call to strlen.
-    Value *DstLen = EmitStrLen(Dst, B);
+    Value *DstLen = EmitStrLen(Dst, B, TD);
 
     // Now that we have the destination's length, we must index into the
     // destination's pointer to get the actual memcpy destination (end of
@@ -623,7 +142,7 @@ struct StrCatOpt : public LibCallOptimization {
     // We have enough information to now generate the memcpy call to do the
     // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
     EmitMemCpy(CpyDst, Src,
-               ConstantInt::get(TD->getIntPtrType(*Context), Len+1), 1, B);
+               ConstantInt::get(TD->getIntPtrType(*Context), Len+1), 1, B, TD);
   }
 };
 
@@ -701,7 +220,8 @@ struct StrChrOpt : public LibCallOptimization {
         return 0;
 
       return EmitMemChr(SrcStr, CI->getOperand(2), // include nul.
-                        ConstantInt::get(TD->getIntPtrType(*Context), Len), B);
+                        ConstantInt::get(TD->getIntPtrType(*Context), Len),
+                        B, TD);
     }
 
     // Otherwise, the character is a constant, see if the first argument is
@@ -772,7 +292,7 @@ struct StrCmpOpt : public LibCallOptimization {
 
       return EmitMemCmp(Str1P, Str2P,
                         ConstantInt::get(TD->getIntPtrType(*Context),
-                        std::min(Len1, Len2)), B);
+                        std::min(Len1, Len2)), B, TD);
     }
 
     return 0;
@@ -852,7 +372,7 @@ struct StrCpyOpt : public LibCallOptimization {
     // We have enough information to now generate the memcpy call to do the
     // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
     EmitMemCpy(Dst, Src,
-               ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B);
+               ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD);
     return Dst;
   }
 };
@@ -881,7 +401,7 @@ struct StrNCpyOpt : public LibCallOptimization {
     if (SrcLen == 0) {
       // strncpy(x, "", y) -> memset(x, '\0', y, 1)
       EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'), LenOp,
-		 B);
+		             B, TD);
       return Dst;
     }
 
@@ -901,7 +421,7 @@ struct StrNCpyOpt : public LibCallOptimization {
 
     // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
     EmitMemCpy(Dst, Src,
-               ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B);
+               ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD);
 
     return Dst;
   }
@@ -993,7 +513,7 @@ struct StrStrOpt : public LibCallOptimization {
 
     // fold strstr(x, "y") -> strchr(x, 'y').
     if (HasStr2 && ToFindStr.size() == 1)
-      return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B),
+      return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B, TD),
                              CI->getType());
     return 0;
   }
@@ -1061,7 +581,8 @@ struct MemCpyOpt : public LibCallOptimization {
       return 0;
 
     // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
-    EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B);
+    EmitMemCpy(CI->getOperand(1), CI->getOperand(2),
+               CI->getOperand(3), 1, B, TD);
     return CI->getOperand(1);
   }
 };
@@ -1082,7 +603,8 @@ struct MemMoveOpt : public LibCallOptimization {
       return 0;
 
     // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
-    EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B);
+    EmitMemMove(CI->getOperand(1), CI->getOperand(2),
+                CI->getOperand(3), 1, B, TD);
     return CI->getOperand(1);
   }
 };
@@ -1105,7 +627,7 @@ struct MemSetOpt : public LibCallOptimization {
     // memset(p, v, n) -> llvm.memset(p, v, n, 1)
     Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context),
 				 false);
-    EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), B);
+    EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), B, TD);
     return CI->getOperand(1);
   }
 };
@@ -1130,11 +652,14 @@ struct MemCpyChkOpt : public LibCallOptimization {
         FT->getParamType(2) != TD->getIntPtrType(*Context))
       return 0;
 
-    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
-    if (!SizeCI)
+    ConstantInt *ObjSizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
+    if (!ObjSizeCI)
       return 0;
-    if (SizeCI->isAllOnesValue()) {
-      EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B);
+    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(3));
+    if (ObjSizeCI->isAllOnesValue() ||
+        (SizeCI && ObjSizeCI->getValue().uge(SizeCI->getValue()))) {
+      EmitMemCpy(CI->getOperand(1), CI->getOperand(2),
+                 CI->getOperand(3), 1, B, TD);
       return CI->getOperand(1);
     }
 
@@ -1158,13 +683,15 @@ struct MemSetChkOpt : public LibCallOptimization {
         FT->getParamType(2) != TD->getIntPtrType(*Context))
       return 0;
 
-    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
-    if (!SizeCI)
+    ConstantInt *ObjSizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
+    if (!ObjSizeCI)
       return 0;
-    if (SizeCI->isAllOnesValue()) {
+    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(3));
+    if (ObjSizeCI->isAllOnesValue() ||
+        (SizeCI && ObjSizeCI->getValue().uge(SizeCI->getValue()))) {
       Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context),
 				   false);
-      EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), B);
+      EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), B, TD);
       return CI->getOperand(1);
     }
 
@@ -1188,12 +715,14 @@ struct MemMoveChkOpt : public LibCallOptimization {
         FT->getParamType(2) != TD->getIntPtrType(*Context))
       return 0;
 
-    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
-    if (!SizeCI)
+    ConstantInt *ObjSizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
+    if (!ObjSizeCI)
       return 0;
-    if (SizeCI->isAllOnesValue()) {
+    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(3));
+    if (ObjSizeCI->isAllOnesValue() ||
+        (SizeCI && ObjSizeCI->getValue().uge(SizeCI->getValue()))) {
       EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
-		  1, B);
+		              1, B, TD);
       return CI->getOperand(1);
     }
 
@@ -1209,8 +738,8 @@ struct StrCpyChkOpt : public LibCallOptimization {
         !FT->getParamType(1)->isPointerTy())
       return 0;
 
-    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(3));
-    if (!SizeCI)
+    ConstantInt *ObjSizeCI = dyn_cast<ConstantInt>(CI->getOperand(3));
+    if (!ObjSizeCI)
       return 0;
     
     // If a) we don't have any length information, or b) we know this will
@@ -1218,9 +747,9 @@ struct StrCpyChkOpt : public LibCallOptimization {
     // strcpy_chk call which may fail at runtime if the size is too long.
     // TODO: It might be nice to get a maximum length out of the possible
     // string lengths for varying.
-    if (SizeCI->isAllOnesValue() ||
-        SizeCI->getZExtValue() >= GetStringLength(CI->getOperand(2)))
-      return EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B);
+    if (ObjSizeCI->isAllOnesValue() ||
+        ObjSizeCI->getZExtValue() >= GetStringLength(CI->getOperand(2)))
+      return EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B, TD);
 
     return 0;
   }
@@ -1512,7 +1041,7 @@ struct PrintFOpt : public LibCallOptimization {
     // in case there is an error writing to stdout.
     if (FormatStr.size() == 1) {
       Value *Res = EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context),
-                                                FormatStr[0]), B);
+                                                FormatStr[0]), B, TD);
       if (CI->use_empty()) return CI;
       return B.CreateIntCast(Res, CI->getType(), true);
     }
@@ -1526,7 +1055,7 @@ struct PrintFOpt : public LibCallOptimization {
       Constant *C = ConstantArray::get(*Context, FormatStr, true);
       C = new GlobalVariable(*Callee->getParent(), C->getType(), true,
                              GlobalVariable::InternalLinkage, C, "str");
-      EmitPutS(C, B);
+      EmitPutS(C, B, TD);
       return CI->use_empty() ? (Value*)CI :
                     ConstantInt::get(CI->getType(), FormatStr.size()+1);
     }
@@ -1535,7 +1064,7 @@ struct PrintFOpt : public LibCallOptimization {
     // printf("%c", chr) --> putchar(*(i8*)dst)
     if (FormatStr == "%c" && CI->getNumOperands() > 2 &&
         CI->getOperand(2)->getType()->isIntegerTy()) {
-      Value *Res = EmitPutChar(CI->getOperand(2), B);
+      Value *Res = EmitPutChar(CI->getOperand(2), B, TD);
 
       if (CI->use_empty()) return CI;
       return B.CreateIntCast(Res, CI->getType(), true);
@@ -1545,7 +1074,7 @@ struct PrintFOpt : public LibCallOptimization {
     if (FormatStr == "%s\n" && CI->getNumOperands() > 2 &&
         CI->getOperand(2)->getType()->isPointerTy() &&
         CI->use_empty()) {
-      EmitPutS(CI->getOperand(2), B);
+      EmitPutS(CI->getOperand(2), B, TD);
       return CI;
     }
     return 0;
@@ -1582,8 +1111,8 @@ struct SPrintFOpt : public LibCallOptimization {
 
       // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
       EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte.
-          ConstantInt::get
-                 (TD->getIntPtrType(*Context), FormatStr.size()+1),1,B);
+                 ConstantInt::get(TD->getIntPtrType(*Context),
+                 FormatStr.size()+1), 1, B, TD);
       return ConstantInt::get(CI->getType(), FormatStr.size());
     }
 
@@ -1614,11 +1143,11 @@ struct SPrintFOpt : public LibCallOptimization {
       // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
       if (!CI->getOperand(3)->getType()->isPointerTy()) return 0;
 
-      Value *Len = EmitStrLen(CI->getOperand(3), B);
+      Value *Len = EmitStrLen(CI->getOperand(3), B, TD);
       Value *IncLen = B.CreateAdd(Len,
                                   ConstantInt::get(Len->getType(), 1),
                                   "leninc");
-      EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B);
+      EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B, TD);
 
       // The sprintf result is the unincremented number of bytes in the string.
       return B.CreateIntCast(Len, CI->getType(), false);
@@ -1654,7 +1183,7 @@ struct FWriteOpt : public LibCallOptimization {
     // If this is writing one byte, turn it into fputc.
     if (Bytes == 1) {  // fwrite(S,1,1,F) -> fputc(S[0],F)
       Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char");
-      EmitFPutC(Char, CI->getOperand(4), B);
+      EmitFPutC(Char, CI->getOperand(4), B, TD);
       return ConstantInt::get(CI->getType(), 1);
     }
 
@@ -1682,7 +1211,7 @@ struct FPutsOpt : public LibCallOptimization {
     if (!Len) return 0;
     EmitFWrite(CI->getOperand(1),
                ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
-               CI->getOperand(2), B);
+               CI->getOperand(2), B, TD);
     return CI;  // Known to have no uses (see above).
   }
 };
@@ -1716,7 +1245,7 @@ struct FPrintFOpt : public LibCallOptimization {
       EmitFWrite(CI->getOperand(2),
                  ConstantInt::get(TD->getIntPtrType(*Context),
                                   FormatStr.size()),
-                 CI->getOperand(1), B);
+                 CI->getOperand(1), B, TD);
       return ConstantInt::get(CI->getType(), FormatStr.size());
     }
 
@@ -1729,7 +1258,7 @@ struct FPrintFOpt : public LibCallOptimization {
     if (FormatStr[1] == 'c') {
       // fprintf(F, "%c", chr) --> *(i8*)dst = chr
       if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0;
-      EmitFPutC(CI->getOperand(3), CI->getOperand(1), B);
+      EmitFPutC(CI->getOperand(3), CI->getOperand(1), B, TD);
       return ConstantInt::get(CI->getType(), 1);
     }
 
@@ -1737,7 +1266,7 @@ struct FPrintFOpt : public LibCallOptimization {
       // fprintf(F, "%s", str) -> fputs(str, F)
       if (!CI->getOperand(3)->getType()->isPointerTy() || !CI->use_empty())
         return 0;
-      EmitFPutS(CI->getOperand(3), CI->getOperand(1), B);
+      EmitFPutS(CI->getOperand(3), CI->getOperand(1), B, TD);
       return CI;
     }
     return 0;
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
new file mode 100644
index 000000000000..2ea4bb6dc98c
--- /dev/null
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -0,0 +1,324 @@
+//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some functions that will create standard C libcalls.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Type.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Intrinsics.h"
+
+using namespace llvm;
+
+/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
+Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
+  return B.CreateBitCast(V, B.getInt8PtrTy(), "cstr");
+}
+
+/// EmitStrLen - Emit a call to the strlen function to the builder, for the
+/// specified pointer.  This always returns an integer value of size intptr_t.
+Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+                                   Attribute::NoUnwind);
+
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2),
+                                            TD->getIntPtrType(Context),
+                                            B.getInt8PtrTy(),
+                                            NULL);
+  CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
+  if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitStrChr - Emit a call to the strchr function to the builder, for the
+/// specified pointer and character.  Ptr is required to be some pointer type,
+/// and the return value has 'i8*' type.
+Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
+                        const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI =
+    AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
+
+  const Type *I8Ptr = B.getInt8PtrTy();
+  const Type *I32Ty = B.getInt32Ty();
+  Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1),
+                                            I8Ptr, I8Ptr, I32Ty, NULL);
+  CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
+                               ConstantInt::get(I32Ty, C), "strchr");
+  if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
+/// specified pointer arguments.
+Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+                        const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  const Type *I8Ptr = B.getInt8PtrTy();
+  Value *StrCpy = M->getOrInsertFunction("strcpy", AttrListPtr::get(AWI, 2),
+                                         I8Ptr, I8Ptr, I8Ptr, NULL);
+  CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
+                               "strcpy");
+  if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+/// EmitMemCpy - Emit a call to the memcpy function to the builder.  This always
+/// expects that the size has type 'intptr_t' and Dst/Src are pointers.
+Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len,
+                        unsigned Align, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  const Type *Ty = Len->getType();
+  Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, &Ty, 1);
+  Dst = CastToCStr(Dst, B);
+  Src = CastToCStr(Src, B);
+  return B.CreateCall4(MemCpy, Dst, Src, Len,
+                       ConstantInt::get(B.getInt32Ty(), Align));
+}
+
+/// EmitMemMove - Emit a call to the memmove function to the builder.  This
+/// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
+Value *llvm::EmitMemMove(Value *Dst, Value *Src, Value *Len,
+					               unsigned Align, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  const Type *Ty = TD->getIntPtrType(Context);
+  Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, &Ty, 1);
+  Dst = CastToCStr(Dst, B);
+  Src = CastToCStr(Src, B);
+  Value *A = ConstantInt::get(B.getInt32Ty(), Align);
+  return B.CreateCall4(MemMove, Dst, Src, Len, A);
+}
+
+/// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
+/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
+Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
+                        Value *Len, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI;
+  AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1),
+                                         B.getInt8PtrTy(),
+                                         B.getInt8PtrTy(),
+                                         B.getInt32Ty(),
+                                         TD->getIntPtrType(Context),
+                                         NULL);
+  CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+
+  if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitMemCmp - Emit a call to the memcmp function.
+Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
+                        Value *Len, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[3];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+                                   Attribute::NoUnwind);
+
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3),
+                                         B.getInt32Ty(),
+                                         B.getInt8PtrTy(),
+                                         B.getInt8PtrTy(),
+                                         TD->getIntPtrType(Context), NULL);
+  CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
+                               Len, "memcmp");
+
+  if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitMemSet - Emit a call to the memset function
+Value *llvm::EmitMemSet(Value *Dst, Value *Val,
+                        Value *Len, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Intrinsic::ID IID = Intrinsic::memset;
+ const Type *Tys[1];
+ Tys[0] = Len->getType();
+ Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1);
+ Value *Align = ConstantInt::get(B.getInt32Ty(), 1);
+ return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align);
+}
+
+/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
+/// 'floor').  This function is known to take a single of type matching 'Op' and
+/// returns one value with the same type.  If 'Op' is a long double, 'l' is
+/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
+Value *llvm::EmitUnaryFloatFnCall(Value *Op, const char *Name,
+                                  IRBuilder<> &B, const AttrListPtr &Attrs) {
+  char NameBuffer[20];
+  if (!Op->getType()->isDoubleTy()) {
+    // If we need to add a suffix, copy into NameBuffer.
+    unsigned NameLen = strlen(Name);
+    assert(NameLen < sizeof(NameBuffer)-2);
+    memcpy(NameBuffer, Name, NameLen);
+    if (Op->getType()->isFloatTy())
+      NameBuffer[NameLen] = 'f';  // floorf
+    else
+      NameBuffer[NameLen] = 'l';  // floorl
+    NameBuffer[NameLen+1] = 0;
+    Name = NameBuffer;
+  }
+
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+                                         Op->getType(), NULL);
+  CallInst *CI = B.CreateCall(Callee, Op, Name);
+  CI->setAttributes(Attrs);
+  if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
+/// is an integer.
+Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(),
+                                          B.getInt32Ty(), NULL);
+  CallInst *CI = B.CreateCall(PutChar,
+                              B.CreateIntCast(Char,
+                              B.getInt32Ty(),
+                              /*isSigned*/true,
+                              "chari"),
+                              "putchar");
+
+  if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+/// EmitPutS - Emit a call to the puts function.  This assumes that Str is
+/// some pointer.
+void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+
+  Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
+                                       B.getInt32Ty(),
+                                       B.getInt8PtrTy(),
+                                       NULL);
+  CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
+  if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+}
+
+/// EmitFPutC - Emit a call to the fputc function.  This assumes that Char is
+/// an integer and File is a pointer to FILE.
+void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
+                     const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  Constant *F;
+  if (File->getType()->isPointerTy())
+    F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2),
+                               B.getInt32Ty(),
+                               B.getInt32Ty(), File->getType(),
+                               NULL);
+  else
+    F = M->getOrInsertFunction("fputc",
+                               B.getInt32Ty(),
+                               B.getInt32Ty(),
+                               File->getType(), NULL);
+  Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
+                         "chari");
+  CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
+
+  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+    CI->setCallingConv(Fn->getCallingConv());
+}
+
+/// EmitFPutS - Emit a call to the puts function.  Str is required to be a
+/// pointer and File is a pointer to FILE.
+void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
+                     const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[3];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  Constant *F;
+  if (File->getType()->isPointerTy())
+    F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3),
+                               B.getInt32Ty(),
+                               B.getInt8PtrTy(),
+                               File->getType(), NULL);
+  else
+    F = M->getOrInsertFunction("fputs", B.getInt32Ty(),
+                               B.getInt8PtrTy(),
+                               File->getType(), NULL);
+  CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+
+  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+    CI->setCallingConv(Fn->getCallingConv());
+}
+
+/// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is
+/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
+void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
+                      IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[3];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture);
+  AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Constant *F;
+  if (File->getType()->isPointerTy())
+    F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3),
+                               TD->getIntPtrType(Context),
+                               B.getInt8PtrTy(),
+                               TD->getIntPtrType(Context),
+                               TD->getIntPtrType(Context),
+                               File->getType(), NULL);
+  else
+    F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(Context),
+                               B.getInt8PtrTy(),
+                               TD->getIntPtrType(Context),
+                               TD->getIntPtrType(Context),
+                               File->getType(), NULL);
+  CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
+                        ConstantInt::get(TD->getIntPtrType(Context), 1), File);
+
+  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+    CI->setCallingConv(Fn->getCallingConv());
+}
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 93577b47ff88..dec227acafd2 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -3,6 +3,7 @@ add_llvm_library(LLVMTransformUtils
   BasicBlockUtils.cpp
   BasicInliner.cpp
   BreakCriticalEdges.cpp
+  BuildLibCalls.cpp
   CloneFunction.cpp
   CloneLoop.cpp
   CloneModule.cpp
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 194a6d4d8c06..549977c2342e 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -1818,7 +1818,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
 
   // Handle some degenerate cases first
   if (isa<UndefValue>(C1) || isa<UndefValue>(C2))
-    return UndefValue::get(ResultTy);
+    return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred));
 
   // No compile-time operations on this type yet.
   if (C1->getType()->isPPC_FP128Ty())
@@ -2070,7 +2070,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
     if (ConstantExpr *CE2 = dyn_cast<ConstantExpr>(C2)) {
       Constant *CE2Op0 = CE2->getOperand(0);
       if (CE2->getOpcode() == Instruction::BitCast &&
-          CE2->getType()->isVectorTy()==CE2Op0->getType()->isVectorTy()) {
+          CE2->getType()->isVectorTy() == CE2Op0->getType()->isVectorTy()) {
         Constant *Inverse = ConstantExpr::getBitCast(C1, CE2Op0->getType());
         return ConstantExpr::getICmp(pred, Inverse, CE2Op0);
       }
@@ -2078,8 +2078,8 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
 
     // If the left hand side is an extension, try eliminating it.
     if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
-      if (CE1->getOpcode() == Instruction::SExt ||
-          CE1->getOpcode() == Instruction::ZExt) {
+      if ((CE1->getOpcode() == Instruction::SExt && ICmpInst::isSigned(pred)) ||
+          (CE1->getOpcode() == Instruction::ZExt && !ICmpInst::isSigned(pred))){
         Constant *CE1Op0 = CE1->getOperand(0);
         Constant *CE1Inverse = ConstantExpr::getTrunc(CE1, CE1Op0->getType());
         if (CE1Inverse == CE1Op0) {
@@ -2097,27 +2097,8 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
       // If C2 is a constant expr and C1 isn't, flip them around and fold the
       // other way if possible.
       // Also, if C1 is null and C2 isn't, flip them around.
-      switch (pred) {
-      case ICmpInst::ICMP_EQ:
-      case ICmpInst::ICMP_NE:
-        // No change of predicate required.
-        return ConstantExpr::getICmp(pred, C2, C1);
-
-      case ICmpInst::ICMP_ULT:
-      case ICmpInst::ICMP_SLT:
-      case ICmpInst::ICMP_UGT:
-      case ICmpInst::ICMP_SGT:
-      case ICmpInst::ICMP_ULE:
-      case ICmpInst::ICMP_SLE:
-      case ICmpInst::ICMP_UGE:
-      case ICmpInst::ICMP_SGE:
-        // Change the predicate as necessary to swap the operands.
-        pred = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)pred);
-        return ConstantExpr::getICmp(pred, C2, C1);
-
-      default:  // These predicates cannot be flopped around.
-        break;
-      }
+      pred = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)pred);
+      return ConstantExpr::getICmp(pred, C2, C1);
     }
   }
   return 0;
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index 85bbe4ac9b00..9887f28821ca 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -246,6 +246,11 @@ public:
       MDNode *N = &(*MDNodeSet.begin());
       N->destroy();
     }
+    // Destroy MDStrings.
+    for (StringMap<MDString*>::iterator I = MDStringCache.begin(),
+           E = MDStringCache.end(); I != E; ++I) {
+      delete I->second;
+    }
   }
 };
 
diff --git a/lib/VMCore/Makefile b/lib/VMCore/Makefile
index bc5e77d22de5..4395ecfda05b 100644
--- a/lib/VMCore/Makefile
+++ b/lib/VMCore/Makefile
@@ -30,5 +30,5 @@ $(GENFILE): $(ObjDir)/Intrinsics.gen.tmp
 	    changed significantly. )
 
 install-local:: $(GENFILE)
-	$(Echo) Installing $(PROJ_includedir)/llvm/Intrinsics.gen
-	$(Verb) $(DataInstall) $(GENFILE) $(PROJ_includedir)/llvm/Intrinsics.gen
+	$(Echo) Installing $(DESTDIR)$(PROJ_includedir)/llvm/Intrinsics.gen
+	$(Verb) $(DataInstall) $(GENFILE) $(DESTDIR)$(PROJ_includedir)/llvm/Intrinsics.gen
diff --git a/runtime/Makefile b/runtime/Makefile
index 1e1045142583..f9a40540a0f8 100644
--- a/runtime/Makefile
+++ b/runtime/Makefile
@@ -20,6 +20,10 @@ ifeq ($(ARCH), Sparc)
 PARALLEL_DIRS := $(filter-out libprofile, $(PARALLEL_DIRS))
 endif
 
+ifeq ($(OS), Cygwin)
+PARALLEL_DIRS := $(filter-out libprofile, $(PARALLEL_DIRS))
+endif
+
 endif
 
 include $(LEVEL)/Makefile.common
diff --git a/test/Bindings/Ocaml/bitwriter.ml b/test/Bindings/Ocaml/bitwriter.ml
index 57caac7cb97d..ef1c9ab722c8 100644
--- a/test/Bindings/Ocaml/bitwriter.ml
+++ b/test/Bindings/Ocaml/bitwriter.ml
@@ -1,4 +1,4 @@
-(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitwriter.cmxa %s -o %t
+(* RUN: %ocamlopt -warn-error A unix.cmxa llvm.cmxa llvm_bitwriter.cmxa %s -o %t
  * RUN: ./%t %t.bc
  * RUN: llvm-dis < %t.bc | grep caml_int_ty
  *)
@@ -10,9 +10,37 @@ let context = Llvm.global_context ()
 
 let test x = if not x then exit 1 else ()
 
+let read_file name =
+  let ic = open_in_bin name in
+  let len = in_channel_length ic in
+  let buf = String.create len in
+
+  test ((input ic buf 0 len) = len);
+
+  close_in ic;
+
+  buf
+
+let temp_bitcode ?unbuffered m =
+  let temp_name, temp_oc = Filename.open_temp_file ~mode:[Open_binary] "" "" in
+
+  test (Llvm_bitwriter.output_bitcode ?unbuffered temp_oc m);
+  flush temp_oc;
+
+  let temp_buf = read_file temp_name in
+
+  close_out temp_oc;
+
+  temp_buf
+
 let _ =
   let m = Llvm.create_module context "ocaml_test_module" in
   
   ignore (Llvm.define_type_name "caml_int_ty" (Llvm.i32_type context) m);
-  
-  test (Llvm_bitwriter.write_bitcode_file m Sys.argv.(1))
+
+  test (Llvm_bitwriter.write_bitcode_file m Sys.argv.(1));
+  let file_buf = read_file Sys.argv.(1) in
+
+  test (file_buf = temp_bitcode m);
+  test (file_buf = temp_bitcode ~unbuffered:false m);
+  test (file_buf = temp_bitcode ~unbuffered:true m)
diff --git a/test/Bindings/Ocaml/scalar_opts.ml b/test/Bindings/Ocaml/scalar_opts.ml
index 1b488c5b021f..f28eff28da75 100644
--- a/test/Bindings/Ocaml/scalar_opts.ml
+++ b/test/Bindings/Ocaml/scalar_opts.ml
@@ -37,11 +37,28 @@ let test_transforms () =
   
   ignore (PassManager.create_function m
            ++ TargetData.add td
-           ++ add_instruction_combining
+           ++ add_constant_propagation
+					 ++ add_sccp
+           ++ add_dead_store_elimination
+           ++ add_aggressive_dce
+           ++ add_scalar_repl_aggregation
+           ++ add_ind_var_simplification
+           ++ add_instruction_combination
+           ++ add_licm
+           ++ add_loop_unswitch
+           ++ add_loop_unroll
+           ++ add_loop_rotation
+           ++ add_loop_index_split
+           ++ add_memory_to_register_promotion
+           ++ add_memory_to_register_demotion
            ++ add_reassociation
-           ++ add_gvn
+           ++ add_jump_threading
            ++ add_cfg_simplification
-           ++ add_constant_propagation
+           ++ add_tail_call_elimination
+           ++ add_gvn
+           ++ add_memcpy_opt
+           ++ add_loop_deletion
+           ++ add_lib_call_simplification
            ++ PassManager.initialize
            ++ PassManager.run_function fn
            ++ PassManager.finalize
diff --git a/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll b/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll
new file mode 100644
index 000000000000..f7adf73263ff
--- /dev/null
+++ b/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -mtriple=arm-unknown-linux-gnueabi
+
+define void @"java.lang.String::getChars"([84 x i8]* %method, i32 %base_pc, [788 x i8]* %thread) {
+  %1 = load i32* undef                            ; <i32> [#uses=1]
+  %2 = sub i32 %1, 48                             ; <i32> [#uses=1]
+  br i1 undef, label %stack_overflow, label %no_overflow
+
+stack_overflow:                                   ; preds = %0
+  unreachable
+
+no_overflow:                                      ; preds = %0
+  %frame = inttoptr i32 %2 to [17 x i32]*         ; <[17 x i32]*> [#uses=4]
+  %3 = load i32* undef                            ; <i32> [#uses=1]
+  %4 = load i32* null                             ; <i32> [#uses=1]
+  %5 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 13 ; <i32*> [#uses=1]
+  %6 = bitcast i32* %5 to [8 x i8]**              ; <[8 x i8]**> [#uses=1]
+  %7 = load [8 x i8]** %6                         ; <[8 x i8]*> [#uses=1]
+  %8 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 12 ; <i32*> [#uses=1]
+  %9 = load i32* %8                               ; <i32> [#uses=1]
+  br i1 undef, label %bci_13, label %bci_4
+
+bci_13:                                           ; preds = %no_overflow
+  br i1 undef, label %bci_30, label %bci_21
+
+bci_30:                                           ; preds = %bci_13
+  br i1 undef, label %bci_46, label %bci_35
+
+bci_46:                                           ; preds = %bci_30
+  %10 = sub i32 %4, %3                            ; <i32> [#uses=1]
+  %11 = load [8 x i8]** null                      ; <[8 x i8]*> [#uses=1]
+  %callee = bitcast [8 x i8]* %11 to [84 x i8]*   ; <[84 x i8]*> [#uses=1]
+  %12 = bitcast i8* undef to i32*                 ; <i32*> [#uses=1]
+  %base_pc7 = load i32* %12                       ; <i32> [#uses=2]
+  %13 = add i32 %base_pc7, 0                      ; <i32> [#uses=1]
+  %14 = inttoptr i32 %13 to void ([84 x i8]*, i32, [788 x i8]*)** ; <void ([84 x i8]*, i32, [788 x i8]*)**> [#uses=1]
+  %entry_point = load void ([84 x i8]*, i32, [788 x i8]*)** %14 ; <void ([84 x i8]*, i32, [788 x i8]*)*> [#uses=1]
+  %15 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 1 ; <i32*> [#uses=1]
+  %16 = ptrtoint i32* %15 to i32                  ; <i32> [#uses=1]
+  %stack_pointer_addr9 = bitcast i8* undef to i32* ; <i32*> [#uses=1]
+  store i32 %16, i32* %stack_pointer_addr9
+  %17 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 2 ; <i32*> [#uses=1]
+  store i32 %9, i32* %17
+  store i32 %10, i32* undef
+  store [84 x i8]* %method, [84 x i8]** undef
+  %18 = add i32 %base_pc, 20                      ; <i32> [#uses=1]
+  store i32 %18, i32* undef
+  store [8 x i8]* %7, [8 x i8]** undef
+  call void %entry_point([84 x i8]* %callee, i32 %base_pc7, [788 x i8]* %thread)
+  br i1 undef, label %no_exception, label %exception
+
+exception:                                        ; preds = %bci_46
+  ret void
+
+no_exception:                                     ; preds = %bci_46
+  ret void
+
+bci_35:                                           ; preds = %bci_30
+  ret void
+
+bci_21:                                           ; preds = %bci_13
+  ret void
+
+bci_4:                                            ; preds = %no_overflow
+  ret void
+}
diff --git a/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll b/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll
new file mode 100644
index 000000000000..b0b4cb37d1a1
--- /dev/null
+++ b/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -march=arm
+
+define void @"java.lang.String::getChars"([84 x i8]* %method, i32 %base_pc, [788 x i8]* %thread) {
+  %1 = sub i32 undef, 48                          ; <i32> [#uses=1]
+  br i1 undef, label %stack_overflow, label %no_overflow
+
+stack_overflow:                                   ; preds = %0
+  unreachable
+
+no_overflow:                                      ; preds = %0
+  %frame = inttoptr i32 %1 to [17 x i32]*         ; <[17 x i32]*> [#uses=4]
+  %2 = load i32* null                             ; <i32> [#uses=2]
+  %3 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 14 ; <i32*> [#uses=1]
+  %4 = load i32* %3                               ; <i32> [#uses=2]
+  %5 = load [8 x i8]** undef                      ; <[8 x i8]*> [#uses=2]
+  br i1 undef, label %bci_13, label %bci_4
+
+bci_13:                                           ; preds = %no_overflow
+  br i1 undef, label %bci_30, label %bci_21
+
+bci_30:                                           ; preds = %bci_13
+  %6 = icmp sle i32 %2, %4                        ; <i1> [#uses=1]
+  br i1 %6, label %bci_46, label %bci_35
+
+bci_46:                                           ; preds = %bci_30
+  store [84 x i8]* %method, [84 x i8]** undef
+  br i1 false, label %no_exception, label %exception
+
+exception:                                        ; preds = %bci_46
+  ret void
+
+no_exception:                                     ; preds = %bci_46
+  ret void
+
+bci_35:                                           ; preds = %bci_30
+  %7 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 15 ; <i32*> [#uses=1]
+  store i32 %2, i32* %7
+  %8 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 14 ; <i32*> [#uses=1]
+  store i32 %4, i32* %8
+  %9 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 13 ; <i32*> [#uses=1]
+  %10 = bitcast i32* %9 to [8 x i8]**             ; <[8 x i8]**> [#uses=1]
+  store [8 x i8]* %5, [8 x i8]** %10
+  call void inttoptr (i32 13839116 to void ([788 x i8]*, i32)*)([788 x i8]* %thread, i32 7)
+  ret void
+
+bci_21:                                           ; preds = %bci_13
+  ret void
+
+bci_4:                                            ; preds = %no_overflow
+  store [8 x i8]* %5, [8 x i8]** undef
+  store i32 undef, i32* undef
+  call void inttoptr (i32 13839116 to void ([788 x i8]*, i32)*)([788 x i8]* %thread, i32 7)
+  ret void
+}
diff --git a/test/CodeGen/CellSPU/bss.ll b/test/CodeGen/CellSPU/bss.ll
new file mode 100644
index 000000000000..05a0f5003931
--- /dev/null
+++ b/test/CodeGen/CellSPU/bss.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep "\.section" %t1.s | grep "\.bss" | count 1
+
+@bssVar = global i32 zeroinitializer
+
diff --git a/test/CodeGen/Thumb2/thumb2-uxtb.ll b/test/CodeGen/Thumb2/thumb2-uxtb.ll
index 4e23f5356cf9..91598cdc961a 100644
--- a/test/CodeGen/Thumb2/thumb2-uxtb.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxtb.ll
@@ -2,14 +2,14 @@
 
 define i32 @test1(i32 %x) {
 ; CHECK: test1
-; CHECK: uxtb16.w  r0, r0
+; CHECK: uxtb16  r0, r0
 	%tmp1 = and i32 %x, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
 
 define i32 @test2(i32 %x) {
 ; CHECK: test2
-; CHECK: uxtb16.w  r0, r0, ror #8
+; CHECK: uxtb16  r0, r0, ror #8
 	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp2
@@ -17,7 +17,7 @@ define i32 @test2(i32 %x) {
 
 define i32 @test3(i32 %x) {
 ; CHECK: test3
-; CHECK: uxtb16.w  r0, r0, ror #8
+; CHECK: uxtb16  r0, r0, ror #8
 	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp2
@@ -25,7 +25,7 @@ define i32 @test3(i32 %x) {
 
 define i32 @test4(i32 %x) {
 ; CHECK: test4
-; CHECK: uxtb16.w  r0, r0, ror #8
+; CHECK: uxtb16  r0, r0, ror #8
 	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
 	%tmp6 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp6
@@ -33,7 +33,7 @@ define i32 @test4(i32 %x) {
 
 define i32 @test5(i32 %x) {
 ; CHECK: test5
-; CHECK: uxtb16.w  r0, r0, ror #8
+; CHECK: uxtb16  r0, r0, ror #8
 	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp2
@@ -41,7 +41,7 @@ define i32 @test5(i32 %x) {
 
 define i32 @test6(i32 %x) {
 ; CHECK: test6
-; CHECK: uxtb16.w  r0, r0, ror #16
+; CHECK: uxtb16  r0, r0, ror #16
 	%tmp1 = lshr i32 %x, 16		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 255		; <i32> [#uses=1]
 	%tmp4 = shl i32 %x, 16		; <i32> [#uses=1]
@@ -52,7 +52,7 @@ define i32 @test6(i32 %x) {
 
 define i32 @test7(i32 %x) {
 ; CHECK: test7
-; CHECK: uxtb16.w  r0, r0, ror #16
+; CHECK: uxtb16  r0, r0, ror #16
 	%tmp1 = lshr i32 %x, 16		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 255		; <i32> [#uses=1]
 	%tmp4 = shl i32 %x, 16		; <i32> [#uses=1]
@@ -63,7 +63,7 @@ define i32 @test7(i32 %x) {
 
 define i32 @test8(i32 %x) {
 ; CHECK: test8
-; CHECK: uxtb16.w  r0, r0, ror #24
+; CHECK: uxtb16  r0, r0, ror #24
 	%tmp1 = shl i32 %x, 8		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16711680		; <i32> [#uses=1]
 	%tmp5 = lshr i32 %x, 24		; <i32> [#uses=1]
@@ -73,7 +73,7 @@ define i32 @test8(i32 %x) {
 
 define i32 @test9(i32 %x) {
 ; CHECK: test9
-; CHECK: uxtb16.w  r0, r0, ror #24
+; CHECK: uxtb16  r0, r0, ror #24
 	%tmp1 = lshr i32 %x, 24		; <i32> [#uses=1]
 	%tmp4 = shl i32 %x, 8		; <i32> [#uses=1]
 	%tmp5 = and i32 %tmp4, 16711680		; <i32> [#uses=1]
@@ -86,7 +86,7 @@ define i32 @test10(i32 %p0) {
 ; CHECK: mov.w r1, #16253176
 ; CHECK: and.w r0, r1, r0, lsr #7
 ; CHECK: lsrs  r1, r0, #5
-; CHECK: uxtb16.w  r1, r1
+; CHECK: uxtb16  r1, r1
 ; CHECK: orr.w r0, r1, r0
 
 	%tmp1 = lshr i32 %p0, 7		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2008-08-05-SpillerBug.ll b/test/CodeGen/X86/2008-08-05-SpillerBug.ll
index 67e14ffae5e6..4c6493445a90 100644
--- a/test/CodeGen/X86/2008-08-05-SpillerBug.ll
+++ b/test/CodeGen/X86/2008-08-05-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 58
+; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 55
 ; PR2568
 
 @g_3 = external global i16		; <i16*> [#uses=1]
diff --git a/test/CodeGen/X86/2010-03-04-Mul8Bug.ll b/test/CodeGen/X86/2010-03-04-Mul8Bug.ll
new file mode 100644
index 000000000000..48e75e957248
--- /dev/null
+++ b/test/CodeGen/X86/2010-03-04-Mul8Bug.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s
+; PR6489
+;
+; This test case produces a MUL8 instruction and then tries to read the result
+; from the AX register instead of AH/AL. That confuses live interval analysis.
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define void @func_56(i64 %p_57, i32*** %p_58) nounwind ssp {
+for.end:
+  %conv49 = trunc i32 undef to i8                 ; <i8> [#uses=1]
+  %div.i = udiv i8 %conv49, 5                     ; <i8> [#uses=1]
+  %conv51 = zext i8 %div.i to i32                 ; <i32> [#uses=1]
+  %call55 = call i32 @qux(i32 undef, i32 -2) nounwind ; <i32> [#uses=1]
+  %rem.i = urem i32 %call55, -1                   ; <i32> [#uses=1]
+  %cmp57 = icmp uge i32 %conv51, %rem.i           ; <i1> [#uses=1]
+  %conv58 = zext i1 %cmp57 to i32                 ; <i32> [#uses=1]
+  %call85 = call i32 @func_35(i32*** undef, i32 undef, i32 %conv58, i32 1247, i32 0) nounwind ; <i32> [#uses=0]
+  ret void
+}
+
+declare i32 @func_35(i32***, i32, i32, i32, i32)
+
+declare i32 @qux(i32, i32)
diff --git a/test/CodeGen/X86/2010-03-05-ConstantFoldCFG.ll b/test/CodeGen/X86/2010-03-05-ConstantFoldCFG.ll
new file mode 100644
index 000000000000..5de19662fffb
--- /dev/null
+++ b/test/CodeGen/X86/2010-03-05-ConstantFoldCFG.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -verify-machineinstrs
+;
+; When BRCOND is constant-folded to BR, make sure that PHI nodes don't get
+; spurious operands when the CFG is trimmed.
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.2"
+
+define fastcc void @_ZSt16__introsort_loopIPdl17less_than_functorEvT_S2_T0_T1_(double* %__first, double* %__last, i64 %__depth_limit) nounwind ssp {
+entry:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %entry
+  ret void
+
+bb2:                                              ; preds = %entry
+  br label %bb2.outer.i
+
+bb2.outer.i:                                      ; preds = %bb9.i, %bb2
+  br i1 undef, label %bb1.i, label %bb5.preheader.i
+
+bb1.i:                                            ; preds = %bb1.i, %bb2.outer.i
+  %indvar5.i = phi i64 [ %tmp, %bb1.i ], [ 0, %bb2.outer.i ] ; <i64> [#uses=1]
+  %tmp = add i64 %indvar5.i, 1                    ; <i64> [#uses=2]
+  %scevgep.i = getelementptr double* undef, i64 %tmp ; <double*> [#uses=0]
+  br i1 undef, label %bb1.i, label %bb5.preheader.i
+
+bb5.preheader.i:                                  ; preds = %bb1.i, %bb2.outer.i
+  br label %bb5.i
+
+bb5.i:                                            ; preds = %bb5.i, %bb5.preheader.i
+  br i1 undef, label %bb5.i, label %bb7.i6
+
+bb7.i6:                                           ; preds = %bb5.i
+  br i1 undef, label %bb9.i, label %_ZSt21__unguarded_partitionIPdd17less_than_functorET_S2_S2_T0_T1_.exit
+
+bb9.i:                                            ; preds = %bb7.i6
+  br label %bb2.outer.i
+
+_ZSt21__unguarded_partitionIPdd17less_than_functorET_S2_S2_T0_T1_.exit: ; preds = %bb7.i6
+  unreachable
+}
diff --git a/test/CodeGen/X86/2010-03-05-EFLAGS-Redef.ll b/test/CodeGen/X86/2010-03-05-EFLAGS-Redef.ll
new file mode 100644
index 000000000000..3cca10e268cb
--- /dev/null
+++ b/test/CodeGen/X86/2010-03-05-EFLAGS-Redef.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -verify-machineinstrs
+;
+; This test case is transformed into a single basic block by the machine
+; branch folding pass. That makes a complete mess of the %EFLAGS liveness, but
+; we don't care about liveness this late anyway.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.2"
+
+define i32 @main(i32 %argc, i8** nocapture %argv) ssp {
+entry:
+  br i1 undef, label %bb, label %bb2
+
+bb:                                               ; preds = %entry
+  br label %bb2
+
+bb2:                                              ; preds = %bb, %entry
+  br i1 undef, label %bb3, label %bb5
+
+bb3:                                              ; preds = %bb2
+  br label %bb5
+
+bb5:                                              ; preds = %bb3, %bb2
+  br i1 undef, label %bb.nph239, label %bb8
+
+bb.nph239:                                        ; preds = %bb5
+  unreachable
+
+bb8:                                              ; preds = %bb5
+  br i1 undef, label %bb.nph237, label %bb47
+
+bb.nph237:                                        ; preds = %bb8
+  unreachable
+
+bb47:                                             ; preds = %bb8
+  br i1 undef, label %bb49, label %bb48
+
+bb48:                                             ; preds = %bb47
+  unreachable
+
+bb49:                                             ; preds = %bb47
+  br i1 undef, label %bb51, label %bb50
+
+bb50:                                             ; preds = %bb49
+  ret i32 0
+
+bb51:                                             ; preds = %bb49
+  ret i32 0
+}
diff --git a/test/CodeGen/X86/bswap-inline-asm.ll b/test/CodeGen/X86/bswap-inline-asm.ll
index 5bf58fa1d505..2b7019371a17 100644
--- a/test/CodeGen/X86/bswap-inline-asm.ll
+++ b/test/CodeGen/X86/bswap-inline-asm.ll
@@ -1,17 +1,80 @@
 ; RUN: llc < %s -march=x86-64 > %t
 ; RUN: not grep APP %t
-; RUN: grep bswapq %t | count 2
-; RUN: grep bswapl %t | count 1
+; RUN: FileCheck %s < %t
 
+; CHECK: foo:
+; CHECK: bswapq
 define i64 @foo(i64 %x) nounwind {
 	%asmtmp = tail call i64 asm "bswap $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
 	ret i64 %asmtmp
 }
+
+; CHECK: bar:
+; CHECK: bswapq
 define i64 @bar(i64 %x) nounwind {
 	%asmtmp = tail call i64 asm "bswapq ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
 	ret i64 %asmtmp
 }
+
+; CHECK: pen:
+; CHECK: bswapl
 define i32 @pen(i32 %x) nounwind {
 	%asmtmp = tail call i32 asm "bswapl ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind
 	ret i32 %asmtmp
 }
+
+; CHECK: s16:
+; CHECK: rolw    $8,
+define zeroext i16 @s16(i16 zeroext %x) nounwind {
+  %asmtmp = tail call i16 asm "rorw $$8, ${0:w}", "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i16 %x) nounwind
+  ret i16 %asmtmp
+}
+
+; CHECK: t16:
+; CHECK: rolw    $8,
+define zeroext i16 @t16(i16 zeroext %x) nounwind {
+  %asmtmp = tail call i16 asm "rorw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{fpsr},~{flags}"(i16 %x) nounwind
+  ret i16 %asmtmp
+}
+
+; CHECK: u16:
+; CHECK: rolw    $8,
+define zeroext i16 @u16(i16 zeroext %x) nounwind {
+  %asmtmp = tail call i16 asm "rolw $$8, ${0:w}", "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i16 %x) nounwind
+  ret i16 %asmtmp
+}
+
+; CHECK: v16:
+; CHECK: rolw    $8,
+define zeroext i16 @v16(i16 zeroext %x) nounwind {
+  %asmtmp = tail call i16 asm "rolw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{fpsr},~{flags}"(i16 %x) nounwind
+  ret i16 %asmtmp
+}
+
+; CHECK: s32:
+; CHECK: bswapl
+define i32 @s32(i32 %x) nounwind {
+  %asmtmp = tail call i32 asm "bswap $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind
+  ret i32 %asmtmp
+}
+
+; CHECK: t32:
+; CHECK: bswapl
+define i32 @t32(i32 %x) nounwind {
+  %asmtmp = tail call i32 asm "bswap $0", "=r,0,~{dirflag},~{flags},~{fpsr}"(i32 %x) nounwind
+  ret i32 %asmtmp
+}
+
+; CHECK: s64:
+; CHECK: bswapq
+define i64 @s64(i64 %x) nounwind {
+  %asmtmp = tail call i64 asm "bswap ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
+  ret i64 %asmtmp
+}
+
+; CHECK: t64:
+; CHECK: bswapq
+define i64 @t64(i64 %x) nounwind {
+  %asmtmp = tail call i64 asm "bswap ${0:q}", "=r,0,~{fpsr},~{dirflag},~{flags}"(i64 %x) nounwind
+  ret i64 %asmtmp
+}
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
new file mode 100644
index 000000000000..1e13046f2acd
--- /dev/null
+++ b/test/CodeGen/X86/crash.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=x86 %s -o -
+; RUN: llc -march=x86-64 %s -o -
+
+; PR6497
+
+; Chain and flag folding issues.
+define i32 @test1() nounwind ssp {
+entry:
+  %tmp5.i = volatile load i32* undef              ; <i32> [#uses=1]
+  %conv.i = zext i32 %tmp5.i to i64               ; <i64> [#uses=1]
+  %tmp12.i = volatile load i32* undef             ; <i32> [#uses=1]
+  %conv13.i = zext i32 %tmp12.i to i64            ; <i64> [#uses=1]
+  %shl.i = shl i64 %conv13.i, 32                  ; <i64> [#uses=1]
+  %or.i = or i64 %shl.i, %conv.i                  ; <i64> [#uses=1]
+  %add16.i = add i64 %or.i, 256                   ; <i64> [#uses=1]
+  %shr.i = lshr i64 %add16.i, 8                   ; <i64> [#uses=1]
+  %conv19.i = trunc i64 %shr.i to i32             ; <i32> [#uses=1]
+  volatile store i32 %conv19.i, i32* undef
+  ret i32 undef
+}
diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll
index 1a7b5777ae8a..d79c56bc4637 100644
--- a/test/CodeGen/X86/global-sections.ll
+++ b/test/CodeGen/X86/global-sections.ll
@@ -100,7 +100,7 @@
 
 @G8 = constant [4 x i16] [ i16 1, i16 2, i16 3, i16 0 ]
 
-; DARWIN:	.section	__TEXT,__ustring
+; DARWIN:	.section	__TEXT,__const
 ; DARWIN:	.globl _G8
 ; DARWIN: _G8:
 
@@ -110,7 +110,6 @@
 
 @G9 = constant [4 x i32] [ i32 1, i32 2, i32 3, i32 0 ]
 
-; DARWIN:	.section        __TEXT,__const
 ; DARWIN:	.globl _G9
 ; DARWIN: _G9:
 
diff --git a/test/CodeGen/X86/lsr-reuse-trunc.ll b/test/CodeGen/X86/lsr-reuse-trunc.ll
index a663a220e62d..d1d714491faa 100644
--- a/test/CodeGen/X86/lsr-reuse-trunc.ll
+++ b/test/CodeGen/X86/lsr-reuse-trunc.ll
@@ -1,19 +1,10 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
 ; Full strength reduction wouldn't reduce register pressure, so LSR should
 ; stick with indexing here.
 
-; Also checks andps and andnps shares the same constantpool. Previously llvm
-; will codegen two andps, one using 0x80000000, the other 0x7fffffff.
-; rdar://7323335
-
-; CHECK: movaps LCPI1_0
-; CHECK: movaps LCPI1_1
-; CHECK-NOT: movaps LCPI1_2
-; CHECK: movaps (%rsi,%rax,4), %xmm2
-; CHECK: andps
-; CHECK: andnps
-; CHECK: movaps %xmm2, (%rdi,%rax,4)
+; CHECK: movaps        (%rsi,%rax,4), %xmm3
+; CHECK: movaps        %xmm3, (%rdi,%rax,4)
 ; CHECK: addq  $4, %rax
 ; CHECK: cmpl  %eax, (%rdx)
 ; CHECK-NEXT: jg
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
index e1d0fe76657d..01d73736d6c2 100644
--- a/test/CodeGen/X86/sink-hoist.ll
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -63,6 +63,7 @@ entry:
 ; CHECK: vv:
 ; CHECK: LCPI4_0(%rip), %xmm0
 ; CHECK: LCPI4_1(%rip), %xmm1
+; CHECK: LCPI4_2(%rip), %xmm2
 ; CHECK: align
 ; CHECK-NOT: LCPI
 ; CHECK: ret
diff --git a/test/CodeGen/X86/tailcall2.ll b/test/CodeGen/X86/tailcall2.ll
index 80bab619c16f..90315fd2f267 100644
--- a/test/CodeGen/X86/tailcall2.ll
+++ b/test/CodeGen/X86/tailcall2.ll
@@ -195,3 +195,24 @@ bb2:
 }
 
 declare i32 @foo6(i32, i32, %struct.t* byval align 4)
+
+; rdar://r7717598
+%struct.ns = type { i32, i32 }
+%struct.cp = type { float, float }
+
+define %struct.ns* @t13(%struct.cp* %yy) nounwind ssp {
+; 32: t13:
+; 32-NOT: jmp
+; 32: call
+; 32: ret
+
+; 64: t13:
+; 64-NOT: jmp
+; 64: call
+; 64: ret
+entry:
+  %0 = tail call fastcc %struct.ns* @foo7(%struct.cp* byval align 4 %yy, i8 signext 0) nounwind
+  ret %struct.ns* %0
+}
+
+declare fastcc %struct.ns* @foo7(%struct.cp* byval align 4, i8 signext) nounwind ssp
diff --git a/test/CodeGen/X86/use-add-flags.ll b/test/CodeGen/X86/use-add-flags.ll
index 2dd2a4adac55..c2f0c23fe1d3 100644
--- a/test/CodeGen/X86/use-add-flags.ll
+++ b/test/CodeGen/X86/use-add-flags.ll
@@ -5,13 +5,13 @@
 
 ; Use the flags on the add.
 
-; CHECK: add_zf:
+; CHECK: test1:
 ;      CHECK: addl    (%rdi), %esi
 ; CHECK-NEXT: movl    %edx, %eax
 ; CHECK-NEXT: cmovnsl %ecx, %eax
 ; CHECK-NEXT: ret
 
-define i32 @add_zf(i32* %x, i32 %y, i32 %a, i32 %b) nounwind {
+define i32 @test1(i32* %x, i32 %y, i32 %a, i32 %b) nounwind {
 	%tmp2 = load i32* %x, align 4		; <i32> [#uses=1]
 	%tmp4 = add i32 %tmp2, %y		; <i32> [#uses=1]
 	%tmp5 = icmp slt i32 %tmp4, 0		; <i1> [#uses=1]
@@ -24,10 +24,10 @@ declare void @foo(i32)
 ; Don't use the flags result of the and here, since the and has no
 ; other use. A simple test is better.
 
-; CHECK: bar:
+; CHECK: test2:
 ; CHECK: testb   $16, %dil
 
-define void @bar(i32 %x) nounwind {
+define void @test2(i32 %x) nounwind {
   %y = and i32 %x, 16
   %t = icmp eq i32 %y, 0
   br i1 %t, label %true, label %false
@@ -40,11 +40,11 @@ false:
 
 ; Do use the flags result of the and here, since the and has another use.
 
-; CHECK: qux:
+; CHECK: test3:
 ;      CHECK: andl    $16, %edi
 ; CHECK-NEXT: jne
 
-define void @qux(i32 %x) nounwind {
+define void @test3(i32 %x) nounwind {
   %y = and i32 %x, 16
   %t = icmp eq i32 %y, 0
   br i1 %t, label %true, label %false
diff --git a/test/FrontendC/2010-03-5-LexicalScope.c b/test/FrontendC/2010-03-5-LexicalScope.c
new file mode 100644
index 000000000000..93a841a8f29d
--- /dev/null
+++ b/test/FrontendC/2010-03-5-LexicalScope.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc -S -O0 -g %s -o - | grep DW_TAG_lexical_block | count 3
+int foo(int i) {
+	if (i) {
+		int j = 2;
+	}
+	else {
+		int j = 3;
+	}
+	return i;
+}
diff --git a/test/Transforms/InstCombine/2006-12-08-ICmp-Combining.ll b/test/Transforms/InstCombine/2006-12-08-ICmp-Combining.ll
deleted file mode 100644
index 80ee3e2a293f..000000000000
--- a/test/Transforms/InstCombine/2006-12-08-ICmp-Combining.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN:    grep {%bothcond =}
-
-define i1 @Doit_bb(i32 %i.0) {
-bb:
-        %tmp = icmp sgt i32 %i.0, 0             ; <i1> [#uses=1]
-        %tmp.not = xor i1 %tmp, true            ; <i1> [#uses=1]
-        %tmp2 = icmp sgt i32 %i.0, 8            ; <i1> [#uses=1]
-        %bothcond = or i1 %tmp.not, %tmp2               ; <i1> [#uses=1]
-        br i1 %bothcond, label %exitTrue, label %exitFalse
-
-exitTrue:               ; preds = %bb
-        ret i1 true
-
-exitFalse:              ; preds = %bb
-        ret i1 false
-}
-
diff --git a/test/Transforms/InstCombine/2010-03-03-ExtElim.ll b/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
new file mode 100644
index 000000000000..2df12d670adb
--- /dev/null
+++ b/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
@@ -0,0 +1,18 @@
+; RUN: opt -instcombine -S %s | FileCheck %s
+; PR6486
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-unknown-linux-gnu"
+
+@g_92 = common global [2 x i32*] zeroinitializer, align 4 ; <[2 x i32*]*> [#uses=1]
+@g_177 = constant i32** bitcast (i8* getelementptr (i8* bitcast ([2 x i32*]* @g_92 to i8*), i64 4) to i32**), align 4 ; <i32***> [#uses=1]
+
+define i1 @test() nounwind {
+; CHECK: @test
+  %tmp = load i32*** @g_177                       ; <i32**> [#uses=1]
+  %cmp = icmp ne i32** null, %tmp                 ; <i1> [#uses=1]
+  %conv = zext i1 %cmp to i32                     ; <i32> [#uses=1]
+  %cmp1 = icmp sle i32 0, %conv                   ; <i1> [#uses=1]
+  ret i1 %cmp1
+; CHECK: ret i1 true
+}
diff --git a/test/Transforms/InstCombine/JavaCompare.ll b/test/Transforms/InstCombine/JavaCompare.ll
index 7d0edb84d1eb..46b6c19f9a5b 100644
--- a/test/Transforms/InstCombine/JavaCompare.ll
+++ b/test/Transforms/InstCombine/JavaCompare.ll
@@ -1,7 +1,7 @@
 ; This is the sequence of stuff that the Java front-end expands for a single 
 ; <= comparison.  Check to make sure we turn it into a <= (only)
 
-; RUN: opt < %s -instcombine -S | grep {%c3 = icmp sle i32 %A, %B}
+; RUN: opt < %s -instcombine -S | grep {icmp sle i32 %A, %B}
 
 define i1 @le(i32 %A, i32 %B) {
         %c1 = icmp sgt i32 %A, %B               ; <i1> [#uses=1]
diff --git a/test/Transforms/InstCombine/crash.ll b/test/Transforms/InstCombine/crash.ll
index 2faa5392d4ba..854bfc81de2e 100644
--- a/test/Transforms/InstCombine/crash.ll
+++ b/test/Transforms/InstCombine/crash.ll
@@ -237,3 +237,18 @@ entry:
   %or = or i32 %and42, %and47
   ret i32 %or
 }
+
+; PR6503
+define void @test12(i32* %A) nounwind {
+entry:
+  %tmp1 = load i32* %A
+  %cmp = icmp ugt i32 1, %tmp1                    ; <i1> [#uses=1]
+  %conv = zext i1 %cmp to i32                     ; <i32> [#uses=1]
+  %tmp2 = load i32* %A
+  %cmp3 = icmp ne i32 %tmp2, 0                    ; <i1> [#uses=1]
+  %conv4 = zext i1 %cmp3 to i32                   ; <i32> [#uses=1]
+  %or = or i32 %conv, %conv4                      ; <i32> [#uses=1]
+  %cmp5 = icmp ugt i32 undef, %or                 ; <i1> [#uses=1]
+  %conv6 = zext i1 %cmp5 to i32                   ; <i32> [#uses=0]
+  ret void
+}
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index c2234a10e5b7..29997bf8c41e 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -48,7 +48,7 @@ entry:
   %V = icmp eq <2 x i64> %x, undef
   ret <2 x i1> %V
 ; CHECK: @test5
-; CHECK: ret <2 x i1> undef
+; CHECK: ret <2 x i1> <i1 true, i1 true>
 }
 
 define i32 @test6(i32 %a, i32 %b) {
@@ -121,3 +121,13 @@ define i1 @test12(i1 %A) {
 ; CHECK-NEXT: %B = select i1
 ; CHECK-NEXT: ret i1 %B
 }
+
+; PR6481
+define i1 @test13(i8 %X) nounwind readnone {
+entry:
+        %cmp = icmp slt i8 undef, %X
+        ret i1 %cmp
+; CHECK: @test13
+; CHECK: ret i1 false
+}
+
diff --git a/test/Transforms/InstCombine/load-cmp.ll b/test/Transforms/InstCombine/load-cmp.ll
index fe5df928439b..5cafb7787e36 100644
--- a/test/Transforms/InstCombine/load-cmp.ll
+++ b/test/Transforms/InstCombine/load-cmp.ll
@@ -89,8 +89,8 @@ define i1 @test8(i32 %X) {
   ret i1 %S
 ; CHECK: @test8
 ; CHECK-NEXT: add i32 %X, -8
-; CHECK-NEXT: %S = icmp ult i32 {{.*}}, 2
-; CHECK-NEXT: ret i1 %S
+; CHECK-NEXT: icmp ult i32 {{.*}}, 2
+; CHECK-NEXT: ret i1
 }
 
 @GA = internal constant [4 x { i32, i32 } ] [
@@ -107,6 +107,6 @@ define i1 @test9(i32 %X) {
   ret i1 %R
 ; CHECK: @test9
 ; CHECK-NEXT: add i32 %X, -1
-; CHECK-NEXT: %R = icmp ult i32 {{.*}}, 2
-; CHECK-NEXT: ret i1 %R
+; CHECK-NEXT: icmp ult i32 {{.*}}, 2
+; CHECK-NEXT: ret i1
 }
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index 9df122499e62..bf1a37f975d9 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -102,4 +102,24 @@ bb12:
   unreachable
 }
 
+; rdar://7718857
+
+%struct.data = type { [100 x i32], [100 x i32], [1024 x i8] }
+
+define i32 @test4() nounwind ssp {
+; CHECK: @test4
+entry:
+  %0 = alloca %struct.data, align 8
+  %1 = bitcast %struct.data* %0 to i8*
+  %2 = call i64 @llvm.objectsize.i64(i8* %1, i1 false) nounwind
+; CHECK-NOT: @llvm.objectsize
+; CHECK: @__memset_chk(i8* %1, i32 0, i64 1824, i64 1824)
+  %3 = call i8* @__memset_chk(i8* %1, i32 0, i64 1824, i64 %2) nounwind
+  ret i32 0
+}
+
+declare i8* @__memset_chk(i8*, i32, i64, i64) nounwind
+
 declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
+
+declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readonly
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index 189be1050fbd..c3526b77f6a5 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -126,8 +126,8 @@ define i1 @test14(i32 %A, i32 %B) {
         %D = or i1 %C1, %C2
         ret i1 %D
 ; CHECK: @test14
-; CHECK: %D = icmp ne i32 %A, %B
-; CHECK: ret i1 %D
+; CHECK: icmp ne i32 %A, %B
+; CHECK: ret i1
 }
 
 define i1 @test15(i32 %A, i32 %B) {
@@ -137,8 +137,8 @@ define i1 @test15(i32 %A, i32 %B) {
         %D = or i1 %C1, %C2
         ret i1 %D
 ; CHECK: @test15
-; CHECK: %D = icmp ule i32 %A, %B
-; CHECK: ret i1 %D
+; CHECK:  icmp ule i32 %A, %B
+; CHECK: ret i1
 }
 
 define i32 @test16(i32 %A) {
@@ -171,8 +171,8 @@ define i1 @test18(i32 %A) {
         ret i1 %D
 ; CHECK: @test18
 ; CHECK: add i32
-; CHECK: %D = icmp ugt 
-; CHECK: ret i1 %D
+; CHECK:  icmp ugt 
+; CHECK: ret i1 
 }
 
 define i1 @test19(i32 %A) {
@@ -183,8 +183,8 @@ define i1 @test19(i32 %A) {
         ret i1 %D
 ; CHECK: @test19
 ; CHECK: add i32
-; CHECK: %D = icmp ult 
-; CHECK: ret i1 %D
+; CHECK: icmp ult 
+; CHECK: ret i1
 }
 
 define i32 @test20(i32 %x) {
@@ -236,8 +236,8 @@ define i1 @test24(double %X, double %Y) {
         ret i1 %bothcond
         
 ; CHECK: @test24
-; CHECK:   %bothcond = fcmp uno double %Y, %X              ; <i1> [#uses=1]
-; CHECK:   ret i1 %bothcond
+; CHECK:    = fcmp uno double %Y, %X
+; CHECK:   ret i1 
 }
 
 ; PR3266 & PR5276
diff --git a/test/Transforms/InstCombine/phi.ll b/test/Transforms/InstCombine/phi.ll
index f0343e44c2bb..fc321e968224 100644
--- a/test/Transforms/InstCombine/phi.ll
+++ b/test/Transforms/InstCombine/phi.ll
@@ -362,3 +362,43 @@ end:
 ; CHECK-NEXT: ret i64
 }
 
+; PR6512 - Shouldn't merge loads from different addr spaces.
+define i32 @test16(i32 addrspace(1)* %pointer1, i32 %flag, i32* %pointer2)
+nounwind {
+entry:
+  %retval = alloca i32, align 4                   ; <i32*> [#uses=2]
+  %pointer1.addr = alloca i32 addrspace(1)*, align 4 ; <i32 addrspace(1)**>
+  %flag.addr = alloca i32, align 4                ; <i32*> [#uses=2]
+  %pointer2.addr = alloca i32*, align 4           ; <i32**> [#uses=2]
+  %res = alloca i32, align 4                      ; <i32*> [#uses=4]
+  store i32 addrspace(1)* %pointer1, i32 addrspace(1)** %pointer1.addr
+  store i32 %flag, i32* %flag.addr
+  store i32* %pointer2, i32** %pointer2.addr
+  store i32 10, i32* %res
+  %tmp = load i32* %flag.addr                     ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %tmp, 0                   ; <i1> [#uses=1]
+  br i1 %tobool, label %if.then, label %if.else
+
+return:                                           ; preds = %if.end
+  %tmp7 = load i32* %retval                       ; <i32> [#uses=1]
+  ret i32 %tmp7
+
+if.end:                                           ; preds = %if.else, %if.then
+  %tmp6 = load i32* %res                          ; <i32> [#uses=1]
+  store i32 %tmp6, i32* %retval
+  br label %return
+
+if.then:                                          ; preds = %entry
+  %tmp1 = load i32 addrspace(1)** %pointer1.addr  ; <i32 addrspace(1)*>
+  %arrayidx = getelementptr i32 addrspace(1)* %tmp1, i32 0 ; <i32 addrspace(1)*> [#uses=1]
+  %tmp2 = load i32 addrspace(1)* %arrayidx        ; <i32> [#uses=1]
+  store i32 %tmp2, i32* %res
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %tmp3 = load i32** %pointer2.addr               ; <i32*> [#uses=1]
+  %arrayidx4 = getelementptr i32* %tmp3, i32 0    ; <i32*> [#uses=1]
+  %tmp5 = load i32* %arrayidx4                    ; <i32> [#uses=1]
+  store i32 %tmp5, i32* %res
+  br label %if.end
+}
diff --git a/test/Transforms/Reassociate/crash.ll b/test/Transforms/Reassociate/crash.ll
index 060018d8da4c..6f21b66ed005 100644
--- a/test/Transforms/Reassociate/crash.ll
+++ b/test/Transforms/Reassociate/crash.ll
@@ -23,11 +23,22 @@ entry:
   %3 = add nsw i32 undef, %1
   %4 = add nsw i32 %3, %2
   %5 = add nsw i32 %4, 4
-  %6 = shl i32 %0, 3                              ; <i32> [#uses=1]
+  %6 = shl i32 %0, 3
   %7 = add nsw i32 %5, %6
   br label %bb4.i9
 
-bb4.i9:                                           ; preds = %bb3.i7, %bb1.i25.i
+bb4.i9:
   %8 = add nsw i32 undef, %1
   ret i32 0
 }
+
+
+define i32 @test3(i32 %Arg, i32 %x1, i32 %x2, i32 %x3) {
+ %A = mul i32 %x1, %Arg
+ %B = mul i32 %Arg, %x2 ;; Part of add operation being factored, also used by C
+ %C = mul i32 %x3, %B
+
+ %D = add i32 %A, %B
+ %E = add i32 %D, %C
+  ret i32 %E
+}
diff --git a/test/Transforms/SimplifyLibCalls/memset_chk.ll b/test/Transforms/SimplifyLibCalls/memset_chk.ll
new file mode 100644
index 000000000000..c4ef60ec3856
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/memset_chk.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+; rdar://7719085
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.data = type { [100 x i32], [100 x i32], [1024 x i8] }
+
+define i32 @t() nounwind ssp {
+; CHECK: @t
+; CHECK: @llvm.memset.i64
+entry:
+  %0 = alloca %struct.data, align 8               ; <%struct.data*> [#uses=1]
+  %1 = bitcast %struct.data* %0 to i8*            ; <i8*> [#uses=1]
+  %2 = call i8* @__memset_chk(i8* %1, i32 0, i64 1824, i64 1824) nounwind ; <i8*> [#uses=0]
+  ret i32 0
+}
+
+declare i8* @__memset_chk(i8*, i32, i64, i64) nounwind
diff --git a/test/lit.cfg b/test/lit.cfg
index 929871a1d225..b4aec5a50ab1 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -144,6 +144,9 @@ bindings = set(site_exp['llvm_bindings'].split(','))
 def llvm_supports_binding(name):
     return name in bindings
 
+config.conditions["TARGET"] = llvm_supports_target
+config.conditions["BINDING"] = llvm_supports_binding
+
 # Provide on_clone hook for reading 'dg.exp'.
 import os
 simpleLibData = re.compile(r"""load_lib llvm.exp
diff --git a/tools/Makefile b/tools/Makefile
index b6637a95f8e3..86ba72ddacc6 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -21,8 +21,8 @@ PARALLEL_DIRS := opt llvm-as llvm-dis \
                  llvm-ld llvm-prof llvm-link \
                  lli llvm-extract \
                  bugpoint llvm-bcanalyzer llvm-stub \
-                 llvm-mc llvmc \
-                 edis
+                 llvm-mc llvmc
+                 
 
 # Let users override the set of tools to build from the command line.
 ifdef ONLY_TOOLS
@@ -32,18 +32,24 @@ endif
 
 include $(LEVEL)/Makefile.config
 
+# These libraries build as dynamic libraries (.dylib /.so), they can only be
+# built if ENABLE_PIC is set.
 ifeq ($(ENABLE_PIC),1)
-  DIRS += lto
-  ifdef BINUTILS_INCDIR
-    DIRS += gold
+  # No support for dynamic libraries on windows targets.
+  ifneq ($(TARGET_OS), $(filter $(TARGET_OS), Cygwin MingW))
+    PARALLEL_DIRS += edis
+    
+    # gold only builds if binutils is around.  It requires "lto" to build before
+    # it so it is added to DIRS.
+    ifdef BINUTILS_INCDIR
+      PARALLEL_DIRS += gold
+      DIRS += lto
+    else
+      PARALLEL_DIRS += lto
+    endif
   endif
 endif
 
-# No support for lto / gold on windows targets
-ifeq ($(TARGET_OS), $(filter $(TARGET_OS), Cygwin MingW))
-  DIRS := $(filter-out lto gold, $(DIRS))
-endif
-
 # Only build edis if X86 target support is enabled.
 ifeq ($(filter $(TARGETS_TO_BUILD), X86),)
   PARALLEL_DIRS := $(filter-out edis, $(PARALLEL_DIRS))
diff --git a/tools/llvm-config/Makefile b/tools/llvm-config/Makefile
index cc5cf43606b3..c7f7b3234d64 100644
--- a/tools/llvm-config/Makefile
+++ b/tools/llvm-config/Makefile
@@ -126,6 +126,6 @@ clean-local::
 	  $(LibDeps) GenLibDeps.out
 install-local:: all-local
 	$(Echo) Installing llvm-config
-	$(Verb) $(MKDIR) $(PROJ_bindir)
-	$(Verb) $(ScriptInstall) $(ToolDir)/llvm-config $(PROJ_bindir)
+	$(Verb) $(MKDIR) $(DESTDIR)$(PROJ_bindir)
+	$(Verb) $(ScriptInstall) $(ToolDir)/llvm-config $(DESTDIR)$(PROJ_bindir)
 
diff --git a/tools/llvmc/doc/LLVMC-Reference.rst b/tools/llvmc/doc/LLVMC-Reference.rst
index dfe38980375e..ca8500d615d7 100644
--- a/tools/llvmc/doc/LLVMC-Reference.rst
+++ b/tools/llvmc/doc/LLVMC-Reference.rst
@@ -329,16 +329,22 @@ separate option groups syntactically.
 
    - ``required`` - this option must be specified exactly once (or, in case of
      the list options without the ``multi_val`` property, at least
-     once). Incompatible with ``zero_or_one`` and ``one_or_more``.
-
-   - ``one_or_more`` - the option must be specified at least one time. Useful
-     only for list options in conjunction with ``multi_val``; for ordinary lists
-     it is synonymous with ``required``. Incompatible with ``required`` and
-     ``zero_or_one``.
-
-   - ``optional`` - the option can be specified zero or one times. Useful only
-     for list options in conjunction with ``multi_val``. Incompatible with
-     ``required`` and ``one_or_more``.
+     once). Incompatible with ``optional`` and ``one_or_more``.
+
+   - ``optional`` - the option can be specified either zero times or exactly
+     once. The default for switch options. Useful only for list options in
+     conjunction with ``multi_val``. Incompatible with ``required``,
+     ``zero_or_more`` and ``one_or_more``.
+
+   - ``one_or_more`` - the option must be specified at least once. Can be useful
+     to allow switch options be both obligatory and be specified multiple
+     times. For list options is useful only in conjunction with ``multi_val``;
+     for ordinary it is synonymous with ``required``. Incompatible with
+     ``required``, ``optional`` and ``zero_or_more``.
+
+   - ``zero_or_more`` - the option can be specified zero or more times. Useful
+     to allow a single switch option to be specified more than
+     once. Incompatible with ``required``, ``optional`` and ``one_or_more``.
 
    - ``hidden`` - the description of this option will not appear in
      the ``--help`` output (but will appear in the ``--help-hidden``
diff --git a/tools/llvmc/plugins/Base/Base.td.in b/tools/llvmc/plugins/Base/Base.td.in
index 7b82313dc06e..ac0f665925c8 100644
--- a/tools/llvmc/plugins/Base/Base.td.in
+++ b/tools/llvmc/plugins/Base/Base.td.in
@@ -25,13 +25,13 @@ def OptList : OptionList<[
  (switch_option "opt",
     (help "Enable opt")),
  (switch_option "O0",
-    (help "Turn off optimization")),
+    (help "Turn off optimization"), (zero_or_more)),
  (switch_option "O1",
-    (help "Optimization level 1")),
+    (help "Optimization level 1"), (zero_or_more)),
  (switch_option "O2",
-    (help "Optimization level 2")),
+    (help "Optimization level 2"), (zero_or_more)),
  (switch_option "O3",
-    (help "Optimization level 3")),
+    (help "Optimization level 3"), (zero_or_more)),
  (switch_option "S",
     (help "Stop after compilation, do not assemble")),
  (switch_option "c",
diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp
index 84ee0e3075d6..b85f724353c3 100644
--- a/unittests/ExecutionEngine/JIT/JITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -65,6 +65,8 @@ public:
     stubsAllocated = 0;
   }
 
+  void setSizeRequired(bool Required) { SizeRequired = Required; }
+
   virtual void setMemoryWritable() { Base->setMemoryWritable(); }
   virtual void setMemoryExecutable() { Base->setMemoryExecutable(); }
   virtual void setPoisonMemory(bool poison) { Base->setPoisonMemory(poison); }
@@ -628,6 +630,54 @@ TEST_F(JITTest, AvailableExternallyFunctionIsntCompiled) {
                         << " not 7 from the IR version.";
 }
 
+TEST_F(JITTest, NeedsExactSizeWithManyGlobals) {
+  // PR5291: When the JMM needed the exact size of function bodies before
+  // starting to emit them, the JITEmitter would modify a set while iterating
+  // over it.
+  TheJIT->DisableLazyCompilation(true);
+  RJMM->setSizeRequired(true);
+
+  LoadAssembly("@A = global i32 42 "
+               "@B = global i32* @A "
+               "@C = global i32** @B "
+               "@D = global i32*** @C "
+               "@E = global i32**** @D "
+               "@F = global i32***** @E "
+               "@G = global i32****** @F "
+               "@H = global i32******* @G "
+               "@I = global i32******** @H "
+               "define i32********* @test() { "
+               "  ret i32********* @I "
+               "}");
+  Function *testIR = M->getFunction("test");
+  int32_t********* (*test)() = reinterpret_cast<int32_t*********(*)()>(
+    (intptr_t)TheJIT->getPointerToFunction(testIR));
+  EXPECT_EQ(42, *********test());
+}
+
+TEST_F(JITTest, EscapedLazyStubStillCallable) {
+  TheJIT->DisableLazyCompilation(false);
+  LoadAssembly("define internal i32 @stubbed() { "
+               "  ret i32 42 "
+               "} "
+               " "
+               "define i32()* @get_stub() { "
+               "  ret i32()* @stubbed "
+               "} ");
+  typedef int32_t(*StubTy)();
+
+  // Call get_stub() to get the address of @stubbed without actually JITting it.
+  Function *get_stubIR = M->getFunction("get_stub");
+  StubTy (*get_stub)() = reinterpret_cast<StubTy(*)()>(
+    (intptr_t)TheJIT->getPointerToFunction(get_stubIR));
+  StubTy stubbed = get_stub();
+  // Now get_stubIR is the only reference to stubbed's stub.
+  get_stubIR->eraseFromParent();
+  // Now there are no references inside the JIT, but we've got a pointer outside
+  // it.  The stub should be callable and return the right value.
+  EXPECT_EQ(42, stubbed());
+}
+
 // Converts the LLVM assembly to bitcode and returns it in a std::string.  An
 // empty string indicates an error.
 std::string AssembleToBitcode(LLVMContext &Context, const char *Assembly) {
diff --git a/unittests/VMCore/MetadataTest.cpp b/unittests/VMCore/MetadataTest.cpp
index e3747892fdf7..13bf27e5d8fa 100644
--- a/unittests/VMCore/MetadataTest.cpp
+++ b/unittests/VMCore/MetadataTest.cpp
@@ -20,11 +20,15 @@ using namespace llvm;
 
 namespace {
 
-LLVMContext &Context = getGlobalContext();
+class MetadataTest : public testing::Test {
+protected:
+  LLVMContext Context;
+};
+typedef MetadataTest MDStringTest;
 
 // Test that construction of MDString with different value produces different
 // MDString objects, even with the same string pointer and nulls in the string.
-TEST(MDStringTest, CreateDifferent) {
+TEST_F(MDStringTest, CreateDifferent) {
   char x[3] = { 'f', 0, 'A' };
   MDString *s1 = MDString::get(Context, StringRef(&x[0], 3));
   x[2] = 'B';
@@ -34,7 +38,7 @@ TEST(MDStringTest, CreateDifferent) {
 
 // Test that creation of MDStrings with the same string contents produces the
 // same MDString object, even with different pointers.
-TEST(MDStringTest, CreateSame) {
+TEST_F(MDStringTest, CreateSame) {
   char x[4] = { 'a', 'b', 'c', 'X' };
   char y[4] = { 'a', 'b', 'c', 'Y' };
 
@@ -44,7 +48,7 @@ TEST(MDStringTest, CreateSame) {
 }
 
 // Test that MDString prints out the string we fed it.
-TEST(MDStringTest, PrintingSimple) {
+TEST_F(MDStringTest, PrintingSimple) {
   char *str = new char[13];
   strncpy(str, "testing 1 2 3", 13);
   MDString *s = MDString::get(Context, StringRef(str, 13));
@@ -58,7 +62,7 @@ TEST(MDStringTest, PrintingSimple) {
 }
 
 // Test printing of MDString with non-printable characters.
-TEST(MDStringTest, PrintingComplex) {
+TEST_F(MDStringTest, PrintingComplex) {
   char str[5] = {0, '\n', '"', '\\', -1};
   MDString *s = MDString::get(Context, StringRef(str+0, 5));
   std::string Str;
@@ -67,8 +71,10 @@ TEST(MDStringTest, PrintingComplex) {
   EXPECT_STREQ("metadata !\"\\00\\0A\\22\\5C\\FF\"", oss.str().c_str());
 }
 
+typedef MetadataTest MDNodeTest;
+
 // Test the two constructors, and containing other Constants.
-TEST(MDNodeTest, Simple) {
+TEST_F(MDNodeTest, Simple) {
   char x[3] = { 'a', 'b', 'c' };
   char y[3] = { '1', '2', '3' };
 
@@ -101,7 +107,7 @@ TEST(MDNodeTest, Simple) {
   EXPECT_EQ(n1, n2->getOperand(0));
 }
 
-TEST(MDNodeTest, Delete) {
+TEST_F(MDNodeTest, Delete) {
   Constant *C = ConstantInt::get(Type::getInt32Ty(getGlobalContext()), 1);
   Instruction *I = new BitCastInst(C, Type::getInt32Ty(getGlobalContext()));
 
@@ -115,8 +121,9 @@ TEST(MDNodeTest, Delete) {
 }
 
 TEST(NamedMDNodeTest, Search) {
-  Constant *C = ConstantInt::get(Type::getInt32Ty(getGlobalContext()), 1);
-  Constant *C2 = ConstantInt::get(Type::getInt32Ty(getGlobalContext()), 2);
+  LLVMContext Context;
+  Constant *C = ConstantInt::get(Type::getInt32Ty(Context), 1);
+  Constant *C2 = ConstantInt::get(Type::getInt32Ty(Context), 2);
 
   Value *const V = C;
   Value *const V2 = C2;
@@ -125,9 +132,9 @@ TEST(NamedMDNodeTest, Search) {
 
   MDNode *Nodes[2] = { n, n2 };
 
-  Module *M = new Module("MyModule", getGlobalContext());
+  Module *M = new Module("MyModule", Context);
   const char *Name = "llvm.NMD1";
-  NamedMDNode *NMD = NamedMDNode::Create(getGlobalContext(), Name, &Nodes[0], 2, M);
+  NamedMDNode *NMD = NamedMDNode::Create(Context, Name, &Nodes[0], 2, M);
   std::string Str;
   raw_string_ostream oss(Str);
   NMD->print(oss);
diff --git a/utils/TableGen/DAGISelMatcher.h b/utils/TableGen/DAGISelMatcher.h
index c2e81711e085..7955c7e2630d 100644
--- a/utils/TableGen/DAGISelMatcher.h
+++ b/utils/TableGen/DAGISelMatcher.h
@@ -609,11 +609,28 @@ private:
 /// the current node.
 class CheckComplexPatMatcher : public Matcher {
   const ComplexPattern &Pattern;
+  
+  /// MatchNumber - This is the recorded nodes slot that contains the node we want to
+  /// match against.
+  unsigned MatchNumber;
+  
+  /// Name - The name of the node we're matching, for comment emission.
+  std::string Name;
+  
+  /// FirstResult - This is the first slot in the RecordedNodes list that the
+  /// result of the match populates.
+  unsigned FirstResult;
 public:
-  CheckComplexPatMatcher(const ComplexPattern &pattern)
-    : Matcher(CheckComplexPat), Pattern(pattern) {}
+  CheckComplexPatMatcher(const ComplexPattern &pattern, unsigned matchnumber,
+                         const std::string &name, unsigned firstresult)
+    : Matcher(CheckComplexPat), Pattern(pattern), MatchNumber(matchnumber),
+      Name(name), FirstResult(firstresult) {}
   
   const ComplexPattern &getPattern() const { return Pattern; }
+  unsigned getMatchNumber() const { return MatchNumber; }
+  
+  const std::string getName() const { return Name; }
+  unsigned getFirstResult() const { return FirstResult; }
   
   static inline bool classof(const Matcher *N) {
     return N->getKind() == CheckComplexPat;
@@ -625,10 +642,11 @@ public:
 private:
   virtual void printImpl(raw_ostream &OS, unsigned indent) const;
   virtual bool isEqualImpl(const Matcher *M) const {
-    return &cast<CheckComplexPatMatcher>(M)->Pattern == &Pattern;
+    return &cast<CheckComplexPatMatcher>(M)->Pattern == &Pattern &&
+           cast<CheckComplexPatMatcher>(M)->MatchNumber == MatchNumber;
   }
   virtual unsigned getHashImpl() const {
-    return (unsigned)(intptr_t)&Pattern;
+    return (unsigned)(intptr_t)&Pattern ^ MatchNumber;
   }
 };
   
diff --git a/utils/TableGen/DAGISelMatcherEmitter.cpp b/utils/TableGen/DAGISelMatcherEmitter.cpp
index 1f0405038c6b..cabf2d438254 100644
--- a/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -42,8 +42,6 @@ class MatcherTableEmitter {
   DenseMap<Record*, unsigned> NodeXFormMap;
   std::vector<Record*> NodeXForms;
 
-  // Per opcode frequence count. 
-  std::vector<unsigned> Histogram;
 public:
   MatcherTableEmitter() {}
 
@@ -53,7 +51,7 @@ public:
   void EmitPredicateFunctions(const CodeGenDAGPatterns &CGP,
                               formatted_raw_ostream &OS);
   
-  void EmitHistogram(formatted_raw_ostream &OS);
+  void EmitHistogram(const Matcher *N, formatted_raw_ostream &OS);
 private:
   unsigned EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
                        formatted_raw_ostream &OS);
@@ -370,17 +368,22 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     return 2;
 
   case Matcher::CheckComplexPat: {
-    const ComplexPattern &Pattern =
-      cast<CheckComplexPatMatcher>(N)->getPattern();
-    OS << "OPC_CheckComplexPat, " << getComplexPat(Pattern) << ',';
+    const CheckComplexPatMatcher *CCPM = cast<CheckComplexPatMatcher>(N);
+    const ComplexPattern &Pattern = CCPM->getPattern();
+    OS << "OPC_CheckComplexPat, /*CP*/" << getComplexPat(Pattern) << ", /*#*/"
+       << CCPM->getMatchNumber() << ',';
+    
     if (!OmitComments) {
       OS.PadToColumn(CommentIndent) << "// " << Pattern.getSelectFunc();
-      OS << ": " << Pattern.getNumOperands() << " operands";
+      OS << ":$" << CCPM->getName();
+      for (unsigned i = 0, e = Pattern.getNumOperands(); i != e; ++i)
+        OS << " #" << CCPM->getFirstResult()+i;
+           
       if (Pattern.hasProperty(SDNPHasChain))
-        OS << " + chain result and input";
+        OS << " + chain result";
     }
     OS << '\n';
-    return 2;
+    return 3;
   }
       
   case Matcher::CheckAndImm: {
@@ -549,9 +552,6 @@ EmitMatcherList(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
                 formatted_raw_ostream &OS) {
   unsigned Size = 0;
   while (N) {
-    if (unsigned(N->getKind()) >= Histogram.size())
-      Histogram.resize(N->getKind()+1);
-    Histogram[N->getKind()]++;
     if (!OmitComments)
       OS << "/*" << CurrentIdx << "*/";
     unsigned MatcherSize = EmitMatcher(N, Indent, CurrentIdx, OS);
@@ -676,11 +676,38 @@ void MatcherTableEmitter::EmitPredicateFunctions(const CodeGenDAGPatterns &CGP,
   }
 }
 
-void MatcherTableEmitter::EmitHistogram(formatted_raw_ostream &OS) {
+static void BuildHistogram(const Matcher *M, std::vector<unsigned> &OpcodeFreq){
+  for (; M != 0; M = M->getNext()) {
+    // Count this node.
+    if (unsigned(M->getKind()) >= OpcodeFreq.size())
+      OpcodeFreq.resize(M->getKind()+1);
+    OpcodeFreq[M->getKind()]++;
+  
+    // Handle recursive nodes.
+    if (const ScopeMatcher *SM = dyn_cast<ScopeMatcher>(M)) {
+      for (unsigned i = 0, e = SM->getNumChildren(); i != e; ++i)
+        BuildHistogram(SM->getChild(i), OpcodeFreq);
+    } else if (const SwitchOpcodeMatcher *SOM = 
+                 dyn_cast<SwitchOpcodeMatcher>(M)) {
+      for (unsigned i = 0, e = SOM->getNumCases(); i != e; ++i)
+        BuildHistogram(SOM->getCaseMatcher(i), OpcodeFreq);
+    } else if (const SwitchTypeMatcher *STM = dyn_cast<SwitchTypeMatcher>(M)) {
+      for (unsigned i = 0, e = STM->getNumCases(); i != e; ++i)
+        BuildHistogram(STM->getCaseMatcher(i), OpcodeFreq);
+    }
+  }
+}
+
+void MatcherTableEmitter::EmitHistogram(const Matcher *M,
+                                        formatted_raw_ostream &OS) {
   if (OmitComments)
     return;
+  
+  std::vector<unsigned> OpcodeFreq;
+  BuildHistogram(M, OpcodeFreq);
+  
   OS << "  // Opcode Histogram:\n";
-  for (unsigned i = 0, e = Histogram.size(); i != e; ++i) {
+  for (unsigned i = 0, e = OpcodeFreq.size(); i != e; ++i) {
     OS << "  // #";
     switch ((Matcher::KindTy)i) {
     case Matcher::Scope: OS << "OPC_Scope"; break; 
@@ -720,7 +747,7 @@ void MatcherTableEmitter::EmitHistogram(formatted_raw_ostream &OS) {
     case Matcher::CompleteMatch: OS << "OPC_CompleteMatch"; break;    
     }
     
-    OS.PadToColumn(40) << " = " << Histogram[i] << '\n';
+    OS.PadToColumn(40) << " = " << OpcodeFreq[i] << '\n';
   }
   OS << '\n';
 }
@@ -741,7 +768,7 @@ void llvm::EmitMatcherTable(const Matcher *TheMatcher,
   unsigned TotalSize = MatcherEmitter.EmitMatcherList(TheMatcher, 5, 0, OS);
   OS << "    0\n  }; // Total Array size is " << (TotalSize+1) << " bytes\n\n";
   
-  MatcherEmitter.EmitHistogram(OS);
+  MatcherEmitter.EmitHistogram(TheMatcher, OS);
   
   OS << "  #undef TARGET_OPCODE\n";
   OS << "  return SelectCodeCommon(N, MatcherTable,sizeof(MatcherTable));\n}\n";
diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp
index 448280345bc6..5488853e8367 100644
--- a/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/utils/TableGen/DAGISelMatcherGen.cpp
@@ -72,6 +72,14 @@ namespace {
     /// nodes array of all of the recorded input nodes that have flag results.
     SmallVector<unsigned, 2> MatchedFlagResultNodes;
     
+    /// MatchedComplexPatterns - This maintains a list of all of the
+    /// ComplexPatterns that we need to check.  The patterns are known to have
+    /// names which were recorded.  The second element of each pair is the first
+    /// slot number that the OPC_CheckComplexPat opcode drops the matched
+    /// results into.
+    SmallVector<std::pair<const TreePatternNode*,
+                          unsigned>, 2> MatchedComplexPatterns;
+    
     /// PhysRegInputs - List list has an entry for each explicitly specified
     /// physreg input to the pattern.  The first elt is the Register node, the
     /// second is the recorded slot number the input pattern match saved it in.
@@ -247,30 +255,9 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
       exit(1);
     }
 
-    // Handle complex pattern.
-    const ComplexPattern &CP = CGP.getComplexPattern(LeafRec);
-    
-    // Emit a CheckComplexPat operation, which does the match (aborting if it
-    // fails) and pushes the matched operands onto the recorded nodes list.
-    AddMatcher(new CheckComplexPatMatcher(CP));
-    
-    // Record the right number of operands.
-    NextRecordedOperandNo += CP.getNumOperands();
-    if (CP.hasProperty(SDNPHasChain))
-      ++NextRecordedOperandNo; // Chained node operand.
-    
-    // If the complex pattern has a chain, then we need to keep track of the
-    // fact that we just recorded a chain input.  The chain input will be
-    // matched as the last operand of the predicate if it was successful.
-    if (CP.hasProperty(SDNPHasChain)) {
-      // It is the last operand recorded.
-      assert(NextRecordedOperandNo > 1 &&
-             "Should have recorded input/result chains at least!");
-      MatchedChainNodes.push_back(NextRecordedOperandNo-1);
-    }
-    
-    // TODO: Complex patterns can't have output flags, if they did, we'd want
-    // to record them.
+    // Remember this ComplexPattern so that we can emit it after all the other
+    // structural matches are done.
+    MatchedComplexPatterns.push_back(std::make_pair(N, 0));
     return;
   }
   
@@ -484,17 +471,56 @@ bool MatcherGen::EmitMatcherCode(unsigned Variant) {
     if (Variant != 0) return true;
   }
     
+  // Emit the matcher for the pattern structure and types.
+  EmitMatchCode(Pattern.getSrcPattern(), PatWithNoTypes);
+  
   // If the pattern has a predicate on it (e.g. only enabled when a subtarget
   // feature is around, do the check).
-  // FIXME: This should get emitted after the match code below to encourage
-  // sharing.  This can't happen until we get an X86ISD::AddrMode node made by
-  // dag combine, eliminating the horrible side-effect-full stuff from 
-  // X86's MatchAddress.
   if (!Pattern.getPredicateCheck().empty())
     AddMatcher(new CheckPatternPredicateMatcher(Pattern.getPredicateCheck()));
-
-  // Emit the matcher for the pattern structure and types.
-  EmitMatchCode(Pattern.getSrcPattern(), PatWithNoTypes);
+  
+  // Now that we've completed the structural type match, emit any ComplexPattern
+  // checks (e.g. addrmode matches).  We emit this after the structural match
+  // because they are generally more expensive to evaluate and more difficult to
+  // factor.
+  for (unsigned i = 0, e = MatchedComplexPatterns.size(); i != e; ++i) {
+    const TreePatternNode *N = MatchedComplexPatterns[i].first;
+    
+    // Remember where the results of this match get stuck.
+    MatchedComplexPatterns[i].second = NextRecordedOperandNo;
+
+    // Get the slot we recorded the value in from the name on the node.
+    unsigned RecNodeEntry = VariableMap[N->getName()];
+    assert(!N->getName().empty() && RecNodeEntry &&
+           "Complex pattern should have a name and slot");
+    --RecNodeEntry;  // Entries in VariableMap are biased.
+    
+    const ComplexPattern &CP =
+      CGP.getComplexPattern(((DefInit*)N->getLeafValue())->getDef());
+    
+    // Emit a CheckComplexPat operation, which does the match (aborting if it
+    // fails) and pushes the matched operands onto the recorded nodes list.
+    AddMatcher(new CheckComplexPatMatcher(CP, RecNodeEntry,
+                                          N->getName(), NextRecordedOperandNo));
+    
+    // Record the right number of operands.
+    NextRecordedOperandNo += CP.getNumOperands();
+    if (CP.hasProperty(SDNPHasChain)) {
+      // If the complex pattern has a chain, then we need to keep track of the
+      // fact that we just recorded a chain input.  The chain input will be
+      // matched as the last operand of the predicate if it was successful.
+      ++NextRecordedOperandNo; // Chained node operand.
+    
+      // It is the last operand recorded.
+      assert(NextRecordedOperandNo > 1 &&
+             "Should have recorded input/result chains at least!");
+      MatchedChainNodes.push_back(NextRecordedOperandNo-1);
+    }
+    
+    // TODO: Complex patterns can't have output flags, if they did, we'd want
+    // to record them.
+  }
+  
   return false;
 }
 
@@ -507,18 +533,26 @@ void MatcherGen::EmitResultOfNamedOperand(const TreePatternNode *N,
                                           SmallVectorImpl<unsigned> &ResultOps){
   assert(!N->getName().empty() && "Operand not named!");
   
-  unsigned SlotNo = getNamedArgumentSlot(N->getName());
-  
   // A reference to a complex pattern gets all of the results of the complex
   // pattern's match.
   if (const ComplexPattern *CP = N->getComplexPatternInfo(CGP)) {
+    unsigned SlotNo = 0;
+    for (unsigned i = 0, e = MatchedComplexPatterns.size(); i != e; ++i)
+      if (MatchedComplexPatterns[i].first->getName() == N->getName()) {
+        SlotNo = MatchedComplexPatterns[i].second;
+        break;
+      }
+    assert(SlotNo != 0 && "Didn't get a slot number assigned?");
+    
     // The first slot entry is the node itself, the subsequent entries are the
     // matched values.
     for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
-      ResultOps.push_back(SlotNo+i+1);
+      ResultOps.push_back(SlotNo+i);
     return;
   }
 
+  unsigned SlotNo = getNamedArgumentSlot(N->getName());
+
   // If this is an 'imm' or 'fpimm' node, make sure to convert it to the target
   // version of the immediate so that it doesn't get selected due to some other
   // node use.
diff --git a/utils/TableGen/LLVMCConfigurationEmitter.cpp b/utils/TableGen/LLVMCConfigurationEmitter.cpp
index db590534e899..da2d54f5439b 100644
--- a/utils/TableGen/LLVMCConfigurationEmitter.cpp
+++ b/utils/TableGen/LLVMCConfigurationEmitter.cpp
@@ -230,7 +230,8 @@ namespace OptionDescriptionFlags {
   enum OptionDescriptionFlags { Required = 0x1, Hidden = 0x2,
                                 ReallyHidden = 0x4, Extern = 0x8,
                                 OneOrMore = 0x10, Optional = 0x20,
-                                CommaSeparated = 0x40, ForwardNotSplit = 0x80 };
+                                CommaSeparated = 0x40, ForwardNotSplit = 0x80,
+                                ZeroOrMore = 0x100 };
 }
 
 /// OptionDescription - Represents data contained in a single
@@ -260,6 +261,9 @@ struct OptionDescription {
   /// Merge - Merge two option descriptions.
   void Merge (const OptionDescription& other);
 
+  /// CheckConsistency - Check that the flags are consistent.
+  void CheckConsistency() const;
+
   // Misc convenient getters/setters.
 
   bool isAlias() const;
@@ -281,6 +285,9 @@ struct OptionDescription {
   bool isOneOrMore() const;
   void setOneOrMore();
 
+  bool isZeroOrMore() const;
+  void setZeroOrMore();
+
   bool isOptional() const;
   void setOptional();
 
@@ -301,6 +308,20 @@ struct OptionDescription {
 
 };
 
+void OptionDescription::CheckConsistency() const {
+  unsigned i = 0;
+
+  i += this->isRequired();
+  i += this->isOptional();
+  i += this->isOneOrMore();
+  i += this->isZeroOrMore();
+
+  if (i > 1) {
+    throw "Only one of (required), (optional), (one_or_more) or "
+      "(zero_or_more) properties is allowed!";
+  }
+}
+
 void OptionDescription::Merge (const OptionDescription& other)
 {
   if (other.Type != Type)
@@ -359,6 +380,13 @@ void OptionDescription::setOneOrMore() {
   Flags |= OptionDescriptionFlags::OneOrMore;
 }
 
+bool OptionDescription::isZeroOrMore() const {
+  return Flags & OptionDescriptionFlags::ZeroOrMore;
+}
+void OptionDescription::setZeroOrMore() {
+  Flags |= OptionDescriptionFlags::ZeroOrMore;
+}
+
 bool OptionDescription::isOptional() const {
   return Flags & OptionDescriptionFlags::Optional;
 }
@@ -593,6 +621,7 @@ public:
       AddHandler("init", &CollectOptionProperties::onInit);
       AddHandler("multi_val", &CollectOptionProperties::onMultiVal);
       AddHandler("one_or_more", &CollectOptionProperties::onOneOrMore);
+      AddHandler("zero_or_more", &CollectOptionProperties::onZeroOrMore);
       AddHandler("really_hidden", &CollectOptionProperties::onReallyHidden);
       AddHandler("required", &CollectOptionProperties::onRequired);
       AddHandler("optional", &CollectOptionProperties::onOptional);
@@ -651,10 +680,9 @@ private:
 
   void onRequired (const DagInit& d) {
     CheckNumberOfArguments(d, 0);
-    if (optDesc_.isOneOrMore() || optDesc_.isOptional())
-      throw "Only one of (required), (optional) or "
-        "(one_or_more) properties is allowed!";
+
     optDesc_.setRequired();
+    optDesc_.CheckConsistency();
   }
 
   void onInit (const DagInit& d) {
@@ -673,24 +701,31 @@ private:
 
   void onOneOrMore (const DagInit& d) {
     CheckNumberOfArguments(d, 0);
-    if (optDesc_.isRequired() || optDesc_.isOptional())
-      throw "Only one of (required), (optional) or "
-        "(one_or_more) properties is allowed!";
-    if (!OptionType::IsList(optDesc_.Type))
-      llvm::errs() << "Warning: specifying the 'one_or_more' property "
-        "on a non-list option will have no effect.\n";
+
     optDesc_.setOneOrMore();
+    optDesc_.CheckConsistency();
+  }
+
+  void onZeroOrMore (const DagInit& d) {
+    CheckNumberOfArguments(d, 0);
+
+    if (OptionType::IsList(optDesc_.Type))
+      llvm::errs() << "Warning: specifying the 'zero_or_more' property "
+        "on a list option has no effect.\n";
+
+    optDesc_.setZeroOrMore();
+    optDesc_.CheckConsistency();
   }
 
   void onOptional (const DagInit& d) {
     CheckNumberOfArguments(d, 0);
-    if (optDesc_.isRequired() || optDesc_.isOneOrMore())
-      throw "Only one of (required), (optional) or "
-        "(one_or_more) properties is allowed!";
+
     if (!OptionType::IsList(optDesc_.Type))
       llvm::errs() << "Warning: specifying the 'optional' property"
-        "on a non-list option will have no effect.\n";
+        "on a non-list option has no effect.\n";
+
     optDesc_.setOptional();
+    optDesc_.CheckConsistency();
   }
 
   void onMultiVal (const DagInit& d) {
@@ -2323,12 +2358,15 @@ void EmitOptionDefinitions (const OptionDescriptions& descs,
       else
         O << ", cl::Required";
     }
-    else if (val.isOneOrMore() && val.isList()) {
-        O << ", cl::OneOrMore";
-    }
-    else if (val.isOptional() && val.isList()) {
+
+    if (val.isOptional())
         O << ", cl::Optional";
-    }
+
+    if (val.isOneOrMore())
+        O << ", cl::OneOrMore";
+
+    if (val.isZeroOrMore())
+        O << ", cl::ZeroOrMore";
 
     if (val.isReallyHidden())
       O << ", cl::ReallyHidden";
diff --git a/utils/lit/ExampleTests.ObjDir/lit.site.cfg b/utils/lit/ExampleTests.ObjDir/lit.site.cfg
deleted file mode 100644
index 14b6e0134139..000000000000
--- a/utils/lit/ExampleTests.ObjDir/lit.site.cfg
+++ /dev/null
@@ -1,15 +0,0 @@
-# -*- Python -*-
-
-# Site specific configuration file.
-#
-# Typically this will be generated by the build system to automatically set
-# certain configuration variables which cannot be autodetected, so that 'lit'
-# can easily be used on the command line.
-
-import os
-
-# Preserve the obj_root, for use by the main lit.cfg.
-config.example_obj_root = os.path.dirname(__file__)
-
-lit.load_config(config, os.path.join(config.test_source_root,
-                                     'lit.cfg'))
diff --git a/utils/lit/ExampleTests/Clang/fsyntax-only.c b/utils/lit/ExampleTests/Clang/fsyntax-only.c
deleted file mode 100644
index a4a064ba0cf1..000000000000
--- a/utils/lit/ExampleTests/Clang/fsyntax-only.c
+++ /dev/null
@@ -1,4 +0,0 @@
-// RUN: clang -fsyntax-only -Xclang -verify %s
-
-int f0(void) {} // expected-warning {{control reaches end of non-void function}}
-
diff --git a/utils/lit/ExampleTests/Clang/lit.cfg b/utils/lit/ExampleTests/Clang/lit.cfg
deleted file mode 100644
index 114ac60de640..000000000000
--- a/utils/lit/ExampleTests/Clang/lit.cfg
+++ /dev/null
@@ -1,80 +0,0 @@
-# -*- Python -*-
-
-# Configuration file for the 'lit' test runner.
-
-# name: The name of this test suite.
-config.name = 'Clang'
-
-# testFormat: The test format to use to interpret tests.
-#
-# For now we require '&&' between commands, until they get globally killed and
-# the test runner updated.
-config.test_format = lit.formats.ShTest(execute_external = True)
-
-# suffixes: A list of file extensions to treat as test files.
-config.suffixes = ['.c', '.cpp', '.m', '.mm']
-
-# target_triple: Used by ShTest and TclTest formats for XFAIL checks.
-config.target_triple = 'foo'
-
-###
-
-# Discover the 'clang' and 'clangcc' to use.
-
-import os
-
-def inferClang(PATH):
-    # Determine which clang to use.
-    clang = os.getenv('CLANG')
-
-    # If the user set clang in the environment, definitely use that and don't
-    # try to validate.
-    if clang:
-        return clang
-
-    # Otherwise look in the path.
-    clang = lit.util.which('clang', PATH)
-
-    if not clang:
-        lit.fatal("couldn't find 'clang' program, try setting "
-                  "CLANG in your environment")
-
-    return clang
-
-def inferClangCC(clang, PATH):
-    clangcc = os.getenv('CLANGCC')
-
-    # If the user set clang in the environment, definitely use that and don't
-    # try to validate.
-    if clangcc:
-        return clangcc
-
-    # Otherwise try adding -cc since we expect to be looking in a build
-    # directory.
-    if clang.endswith('.exe'):
-        clangccName = clang[:-4] + '-cc.exe'
-    else:
-        clangccName = clang + '-cc'
-    clangcc = lit.util.which(clangccName, PATH)
-    if not clangcc:
-        # Otherwise ask clang.
-        res = lit.util.capture([clang, '-print-prog-name=clang-cc'])
-        res = res.strip()
-        if res and os.path.exists(res):
-            clangcc = res
-
-    if not clangcc:
-        lit.fatal("couldn't find 'clang-cc' program, try setting "
-                  "CLANGCC in your environment")
-
-    return clangcc
-
-clang = inferClang(config.environment['PATH'])
-if not lit.quiet:
-    lit.note('using clang: %r' % clang)
-config.substitutions.append( (' clang ', ' ' + clang + ' ') )
-
-clang_cc = inferClangCC(clang, config.environment['PATH'])
-if not lit.quiet:
-    lit.note('using clang-cc: %r' % clang_cc)
-config.substitutions.append( (' clang-cc ', ' ' + clang_cc + ' ') )
diff --git a/utils/lit/ExampleTests/LLVM.InTree/test/Bar/bar-test.ll b/utils/lit/ExampleTests/LLVM.InTree/test/Bar/bar-test.ll
deleted file mode 100644
index 3017b13e48c0..000000000000
--- a/utils/lit/ExampleTests/LLVM.InTree/test/Bar/bar-test.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: true
-; XFAIL: *
-; XTARGET: darwin
diff --git a/utils/lit/ExampleTests/LLVM.InTree/test/Bar/dg.exp b/utils/lit/ExampleTests/LLVM.InTree/test/Bar/dg.exp
deleted file mode 100644
index 2bda07a31cfb..000000000000
--- a/utils/lit/ExampleTests/LLVM.InTree/test/Bar/dg.exp
+++ /dev/null
@@ -1,6 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
-}
-
diff --git a/utils/lit/ExampleTests/LLVM.InTree/test/lit.cfg b/utils/lit/ExampleTests/LLVM.InTree/test/lit.cfg
deleted file mode 100644
index e7ef037663a3..000000000000
--- a/utils/lit/ExampleTests/LLVM.InTree/test/lit.cfg
+++ /dev/null
@@ -1,151 +0,0 @@
-# -*- Python -*-
-
-# Configuration file for the 'lit' test runner.
-
-import os
-
-# name: The name of this test suite.
-config.name = 'LLVM'
-
-# testFormat: The test format to use to interpret tests.
-config.test_format = lit.formats.TclTest()
-
-# suffixes: A list of file extensions to treat as test files, this is actually
-# set by on_clone().
-config.suffixes = []
-
-# test_source_root: The root path where tests are located.
-config.test_source_root = os.path.dirname(__file__)
-
-# test_exec_root: The root path where tests should be run.
-llvm_obj_root = getattr(config, 'llvm_obj_root', None)
-if llvm_obj_root is not None:
-    config.test_exec_root = os.path.join(llvm_obj_root, 'test')
-
-###
-
-import os
-
-# Check that the object root is known.
-if config.test_exec_root is None:
-    # Otherwise, we haven't loaded the site specific configuration (the user is
-    # probably trying to run on a test file directly, and either the site
-    # configuration hasn't been created by the build system, or we are in an
-    # out-of-tree build situation).
-
-    # Try to detect the situation where we are using an out-of-tree build by
-    # looking for 'llvm-config'.
-    #
-    # FIXME: I debated (i.e., wrote and threw away) adding logic to
-    # automagically generate the lit.site.cfg if we are in some kind of fresh
-    # build situation. This means knowing how to invoke the build system
-    # though, and I decided it was too much magic.
-
-    llvm_config = lit.util.which('llvm-config', config.environment['PATH'])
-    if not llvm_config:
-        lit.fatal('No site specific configuration available!')
-
-    # Get the source and object roots.
-    llvm_src_root = lit.util.capture(['llvm-config', '--src-root']).strip()
-    llvm_obj_root = lit.util.capture(['llvm-config', '--obj-root']).strip()
-
-    # Validate that we got a tree which points to here.
-    this_src_root = os.path.dirname(config.test_source_root)
-    if os.path.realpath(llvm_src_root) != os.path.realpath(this_src_root):
-        lit.fatal('No site specific configuration available!')
-
-    # Check that the site specific configuration exists.
-    site_cfg = os.path.join(llvm_obj_root, 'test', 'lit.site.cfg')
-    if not os.path.exists(site_cfg):
-        lit.fatal('No site specific configuration available!')
-
-    # Okay, that worked. Notify the user of the automagic, and reconfigure.
-    lit.note('using out-of-tree build at %r' % llvm_obj_root)
-    lit.load_config(config, site_cfg)
-    raise SystemExit
-
-###
-
-# Load site data from DejaGNU's site.exp.
-import re
-site_exp = {}
-# FIXME: Implement lit.site.cfg.
-for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')):
-    m = re.match('set ([^ ]+) "([^"]*)"', line)
-    if m:
-        site_exp[m.group(1)] = m.group(2)
-
-# Add substitutions.
-for sub in ['prcontext', 'llvmgcc', 'llvmgxx', 'compile_cxx', 'compile_c',
-            'link', 'shlibext', 'ocamlopt', 'llvmdsymutil', 'llvmlibsdir',
-            'bugpoint_topts']:
-    if sub in ('llvmgcc', 'llvmgxx'):
-        config.substitutions.append(('%' + sub,
-                                     site_exp[sub] + ' -emit-llvm -w'))
-    else:
-        config.substitutions.append(('%' + sub, site_exp[sub]))
-
-excludes = []
-
-# Provide target_triple for use in XFAIL and XTARGET.
-config.target_triple = site_exp['target_triplet']
-
-# Provide llvm_supports_target for use in local configs.
-targets = set(site_exp["TARGETS_TO_BUILD"].split())
-def llvm_supports_target(name):
-    return name in targets
-
-langs = set(site_exp['llvmgcc_langs'].split(','))
-def llvm_gcc_supports(name):
-    return name in langs
-
-# Provide on_clone hook for reading 'dg.exp'.
-import os
-simpleLibData = re.compile(r"""load_lib llvm.exp
-
-RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]""",
-                           re.MULTILINE)
-conditionalLibData = re.compile(r"""load_lib llvm.exp
-
-if.*\[ ?(llvm[^ ]*) ([^ ]*) ?\].*{
- *RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]
-\}""", re.MULTILINE)
-def on_clone(parent, cfg, for_path):
-    def addSuffixes(match):
-        if match[0] == '{' and match[-1] == '}':
-            cfg.suffixes = ['.' + s for s in match[1:-1].split(',')]
-        else:
-            cfg.suffixes = ['.' + match]
-
-    libPath = os.path.join(os.path.dirname(for_path),
-                           'dg.exp')
-    if not os.path.exists(libPath):
-        cfg.unsupported = True
-        return
-
-    # Reset unsupported, in case we inherited it.
-    cfg.unsupported = False
-    lib = open(libPath).read().strip()
-
-    # Check for a simple library.
-    m = simpleLibData.match(lib)
-    if m:
-        addSuffixes(m.group(1))
-        return
-
-    # Check for a conditional test set.
-    m = conditionalLibData.match(lib)
-    if m:
-        funcname,arg,match = m.groups()
-        addSuffixes(match)
-
-        func = globals().get(funcname)
-        if not func:
-            lit.error('unsupported predicate %r' % funcname)
-        elif not func(arg):
-            cfg.unsupported = True
-        return
-    # Otherwise, give up.
-    lit.error('unable to understand %r:\n%s' % (libPath, lib))
-
-config.on_clone = on_clone
diff --git a/utils/lit/ExampleTests/LLVM.InTree/test/lit.site.cfg b/utils/lit/ExampleTests/LLVM.InTree/test/lit.site.cfg
deleted file mode 100644
index 3bfee547b7e3..000000000000
--- a/utils/lit/ExampleTests/LLVM.InTree/test/lit.site.cfg
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- Python -*-
-
-## Autogenerated by Makefile ##
-# Do not edit!
-
-# Preserve some key paths for use by main LLVM test suite config.
-config.llvm_obj_root = os.path.dirname(os.path.dirname(__file__))
-
-# Let the main config do the real work.
-lit.load_config(config, os.path.join(config.llvm_obj_root, 'test/lit.cfg'))
diff --git a/utils/lit/ExampleTests/LLVM.InTree/test/site.exp b/utils/lit/ExampleTests/LLVM.InTree/test/site.exp
deleted file mode 100644
index 1d9c74326841..000000000000
--- a/utils/lit/ExampleTests/LLVM.InTree/test/site.exp
+++ /dev/null
@@ -1,30 +0,0 @@
-## these variables are automatically generated by make ##
-# Do not edit here.  If you wish to override these values
-# edit the last section
-set target_triplet "x86_64-apple-darwin10"
-set TARGETS_TO_BUILD "X86 Sparc PowerPC Alpha ARM Mips CellSPU PIC16 XCore MSP430 SystemZ Blackfin CBackend MSIL CppBackend"
-set llvmgcc_langs "c,c++,objc,obj-c++"
-set llvmgcc_version "4.2.1"
-set prcontext "/usr/bin/tclsh8.4 /Volumes/Data/ddunbar/llvm/test/Scripts/prcontext.tcl"
-set llvmtoolsdir "/Users/ddunbar/llvm.obj.64/Debug/bin"
-set llvmlibsdir "/Users/ddunbar/llvm.obj.64/Debug/lib"
-set srcroot "/Volumes/Data/ddunbar/llvm"
-set objroot "/Volumes/Data/ddunbar/llvm.obj.64"
-set srcdir "/Volumes/Data/ddunbar/llvm/test"
-set objdir "/Volumes/Data/ddunbar/llvm.obj.64/test"
-set gccpath "/usr/bin/gcc -arch x86_64"
-set gxxpath "/usr/bin/g++ -arch x86_64"
-set compile_c " /usr/bin/gcc -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -c "
-set compile_cxx " /usr/bin/g++ -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -g -fno-exceptions -fno-common -Woverloaded-virtual -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -c "
-set link " /usr/bin/g++ -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -g -fno-exceptions -fno-common -Woverloaded-virtual -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -g -L/Users/ddunbar/llvm.obj.64/Debug/lib -L/Volumes/Data/ddunbar/llvm.obj.64/Debug/lib "
-set llvmgcc "/Users/ddunbar/llvm-gcc/install/bin/llvm-gcc -m64 "
-set llvmgxx "/Users/ddunbar/llvm-gcc/install/bin/llvm-gcc -m64 "
-set llvmgccmajvers "4"
-set bugpoint_topts "-gcc-tool-args -m64"
-set shlibext ".dylib"
-set ocamlopt "/sw/bin/ocamlopt -cc \"g++ -Wall -D_FILE_OFFSET_BITS=64 -D_REENTRANT\" -I /Users/ddunbar/llvm.obj.64/Debug/lib/ocaml"
-set valgrind ""
-set grep "/usr/bin/grep"
-set gas "/usr/bin/as"
-set llvmdsymutil "dsymutil"
-## All variables above are generated by configure. Do Not Edit ## 
diff --git a/utils/lit/ExampleTests/LLVM.OutOfTree/lit.local.cfg b/utils/lit/ExampleTests/LLVM.OutOfTree/lit.local.cfg
deleted file mode 100644
index 80d0c7ead6b7..000000000000
--- a/utils/lit/ExampleTests/LLVM.OutOfTree/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.excludes = ['src']
diff --git a/utils/lit/ExampleTests/LLVM.OutOfTree/obj/test/Foo/lit.local.cfg b/utils/lit/ExampleTests/LLVM.OutOfTree/obj/test/Foo/lit.local.cfg
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/utils/lit/ExampleTests/LLVM.OutOfTree/obj/test/Foo/lit.local.cfg
+++ /dev/null
diff --git a/utils/lit/ExampleTests/LLVM.OutOfTree/obj/test/lit.site.cfg b/utils/lit/ExampleTests/LLVM.OutOfTree/obj/test/lit.site.cfg
deleted file mode 100644
index bdcc35e0938c..000000000000
--- a/utils/lit/ExampleTests/LLVM.OutOfTree/obj/test/lit.site.cfg
+++ /dev/null
@@ -1,11 +0,0 @@
-# -*- Python -*-
-
-## Autogenerated by Makefile ##
-# Do not edit!
-
-# Preserve some key paths for use by main LLVM test suite config.
-config.llvm_obj_root = os.path.dirname(os.path.dirname(__file__))
-
-# Let the main config do the real work.
-lit.load_config(config, os.path.join(config.llvm_obj_root,
-                                     '../src/test/lit.cfg'))
diff --git a/utils/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp b/utils/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp
deleted file mode 100644
index 1d9c74326841..000000000000
--- a/utils/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp
+++ /dev/null
@@ -1,30 +0,0 @@
-## these variables are automatically generated by make ##
-# Do not edit here.  If you wish to override these values
-# edit the last section
-set target_triplet "x86_64-apple-darwin10"
-set TARGETS_TO_BUILD "X86 Sparc PowerPC Alpha ARM Mips CellSPU PIC16 XCore MSP430 SystemZ Blackfin CBackend MSIL CppBackend"
-set llvmgcc_langs "c,c++,objc,obj-c++"
-set llvmgcc_version "4.2.1"
-set prcontext "/usr/bin/tclsh8.4 /Volumes/Data/ddunbar/llvm/test/Scripts/prcontext.tcl"
-set llvmtoolsdir "/Users/ddunbar/llvm.obj.64/Debug/bin"
-set llvmlibsdir "/Users/ddunbar/llvm.obj.64/Debug/lib"
-set srcroot "/Volumes/Data/ddunbar/llvm"
-set objroot "/Volumes/Data/ddunbar/llvm.obj.64"
-set srcdir "/Volumes/Data/ddunbar/llvm/test"
-set objdir "/Volumes/Data/ddunbar/llvm.obj.64/test"
-set gccpath "/usr/bin/gcc -arch x86_64"
-set gxxpath "/usr/bin/g++ -arch x86_64"
-set compile_c " /usr/bin/gcc -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -c "
-set compile_cxx " /usr/bin/g++ -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -g -fno-exceptions -fno-common -Woverloaded-virtual -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -c "
-set link " /usr/bin/g++ -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -g -fno-exceptions -fno-common -Woverloaded-virtual -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -g -L/Users/ddunbar/llvm.obj.64/Debug/lib -L/Volumes/Data/ddunbar/llvm.obj.64/Debug/lib "
-set llvmgcc "/Users/ddunbar/llvm-gcc/install/bin/llvm-gcc -m64 "
-set llvmgxx "/Users/ddunbar/llvm-gcc/install/bin/llvm-gcc -m64 "
-set llvmgccmajvers "4"
-set bugpoint_topts "-gcc-tool-args -m64"
-set shlibext ".dylib"
-set ocamlopt "/sw/bin/ocamlopt -cc \"g++ -Wall -D_FILE_OFFSET_BITS=64 -D_REENTRANT\" -I /Users/ddunbar/llvm.obj.64/Debug/lib/ocaml"
-set valgrind ""
-set grep "/usr/bin/grep"
-set gas "/usr/bin/as"
-set llvmdsymutil "dsymutil"
-## All variables above are generated by configure. Do Not Edit ## 
diff --git a/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/data.txt b/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/data.txt
deleted file mode 100644
index 45b983be36b7..000000000000
--- a/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/data.txt
+++ /dev/null
@@ -1 +0,0 @@
-hi
diff --git a/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/dg.exp b/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/dg.exp
deleted file mode 100644
index 2bda07a31cfb..000000000000
--- a/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/dg.exp
+++ /dev/null
@@ -1,6 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
-}
-
diff --git a/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/pct-S.ll b/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/pct-S.ll
deleted file mode 100644
index 4e8a58205569..000000000000
--- a/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/pct-S.ll
+++ /dev/null
@@ -1 +0,0 @@
-; RUN: grep "hi" %S/data.txt
-\ No newline at end of file
diff --git a/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg b/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg
deleted file mode 100644
index e7ef037663a3..000000000000
--- a/utils/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg
+++ /dev/null
@@ -1,151 +0,0 @@
-# -*- Python -*-
-
-# Configuration file for the 'lit' test runner.
-
-import os
-
-# name: The name of this test suite.
-config.name = 'LLVM'
-
-# testFormat: The test format to use to interpret tests.
-config.test_format = lit.formats.TclTest()
-
-# suffixes: A list of file extensions to treat as test files, this is actually
-# set by on_clone().
-config.suffixes = []
-
-# test_source_root: The root path where tests are located.
-config.test_source_root = os.path.dirname(__file__)
-
-# test_exec_root: The root path where tests should be run.
-llvm_obj_root = getattr(config, 'llvm_obj_root', None)
-if llvm_obj_root is not None:
-    config.test_exec_root = os.path.join(llvm_obj_root, 'test')
-
-###
-
-import os
-
-# Check that the object root is known.
-if config.test_exec_root is None:
-    # Otherwise, we haven't loaded the site specific configuration (the user is
-    # probably trying to run on a test file directly, and either the site
-    # configuration hasn't been created by the build system, or we are in an
-    # out-of-tree build situation).
-
-    # Try to detect the situation where we are using an out-of-tree build by
-    # looking for 'llvm-config'.
-    #
-    # FIXME: I debated (i.e., wrote and threw away) adding logic to
-    # automagically generate the lit.site.cfg if we are in some kind of fresh
-    # build situation. This means knowing how to invoke the build system
-    # though, and I decided it was too much magic.
-
-    llvm_config = lit.util.which('llvm-config', config.environment['PATH'])
-    if not llvm_config:
-        lit.fatal('No site specific configuration available!')
-
-    # Get the source and object roots.
-    llvm_src_root = lit.util.capture(['llvm-config', '--src-root']).strip()
-    llvm_obj_root = lit.util.capture(['llvm-config', '--obj-root']).strip()
-
-    # Validate that we got a tree which points to here.
-    this_src_root = os.path.dirname(config.test_source_root)
-    if os.path.realpath(llvm_src_root) != os.path.realpath(this_src_root):
-        lit.fatal('No site specific configuration available!')
-
-    # Check that the site specific configuration exists.
-    site_cfg = os.path.join(llvm_obj_root, 'test', 'lit.site.cfg')
-    if not os.path.exists(site_cfg):
-        lit.fatal('No site specific configuration available!')
-
-    # Okay, that worked. Notify the user of the automagic, and reconfigure.
-    lit.note('using out-of-tree build at %r' % llvm_obj_root)
-    lit.load_config(config, site_cfg)
-    raise SystemExit
-
-###
-
-# Load site data from DejaGNU's site.exp.
-import re
-site_exp = {}
-# FIXME: Implement lit.site.cfg.
-for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')):
-    m = re.match('set ([^ ]+) "([^"]*)"', line)
-    if m:
-        site_exp[m.group(1)] = m.group(2)
-
-# Add substitutions.
-for sub in ['prcontext', 'llvmgcc', 'llvmgxx', 'compile_cxx', 'compile_c',
-            'link', 'shlibext', 'ocamlopt', 'llvmdsymutil', 'llvmlibsdir',
-            'bugpoint_topts']:
-    if sub in ('llvmgcc', 'llvmgxx'):
-        config.substitutions.append(('%' + sub,
-                                     site_exp[sub] + ' -emit-llvm -w'))
-    else:
-        config.substitutions.append(('%' + sub, site_exp[sub]))
-
-excludes = []
-
-# Provide target_triple for use in XFAIL and XTARGET.
-config.target_triple = site_exp['target_triplet']
-
-# Provide llvm_supports_target for use in local configs.
-targets = set(site_exp["TARGETS_TO_BUILD"].split())
-def llvm_supports_target(name):
-    return name in targets
-
-langs = set(site_exp['llvmgcc_langs'].split(','))
-def llvm_gcc_supports(name):
-    return name in langs
-
-# Provide on_clone hook for reading 'dg.exp'.
-import os
-simpleLibData = re.compile(r"""load_lib llvm.exp
-
-RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]""",
-                           re.MULTILINE)
-conditionalLibData = re.compile(r"""load_lib llvm.exp
-
-if.*\[ ?(llvm[^ ]*) ([^ ]*) ?\].*{
- *RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]
-\}""", re.MULTILINE)
-def on_clone(parent, cfg, for_path):
-    def addSuffixes(match):
-        if match[0] == '{' and match[-1] == '}':
-            cfg.suffixes = ['.' + s for s in match[1:-1].split(',')]
-        else:
-            cfg.suffixes = ['.' + match]
-
-    libPath = os.path.join(os.path.dirname(for_path),
-                           'dg.exp')
-    if not os.path.exists(libPath):
-        cfg.unsupported = True
-        return
-
-    # Reset unsupported, in case we inherited it.
-    cfg.unsupported = False
-    lib = open(libPath).read().strip()
-
-    # Check for a simple library.
-    m = simpleLibData.match(lib)
-    if m:
-        addSuffixes(m.group(1))
-        return
-
-    # Check for a conditional test set.
-    m = conditionalLibData.match(lib)
-    if m:
-        funcname,arg,match = m.groups()
-        addSuffixes(match)
-
-        func = globals().get(funcname)
-        if not func:
-            lit.error('unsupported predicate %r' % funcname)
-        elif not func(arg):
-            cfg.unsupported = True
-        return
-    # Otherwise, give up.
-    lit.error('unable to understand %r:\n%s' % (libPath, lib))
-
-config.on_clone = on_clone
diff --git a/utils/lit/ExampleTests/ShExternal/lit.local.cfg b/utils/lit/ExampleTests/ShExternal/lit.local.cfg
deleted file mode 100644
index 1061da62fd34..000000000000
--- a/utils/lit/ExampleTests/ShExternal/lit.local.cfg
+++ /dev/null
@@ -1,6 +0,0 @@
-# -*- Python -*-
-
-config.test_format = lit.formats.ShTest(execute_external = True)
-
-config.suffixes = ['.c']
-
diff --git a/utils/lit/ExampleTests/ShInternal/lit.local.cfg b/utils/lit/ExampleTests/ShInternal/lit.local.cfg
deleted file mode 100644
index 448eaa4092b6..000000000000
--- a/utils/lit/ExampleTests/ShInternal/lit.local.cfg
+++ /dev/null
@@ -1,6 +0,0 @@
-# -*- Python -*-
-
-config.test_format = lit.formats.ShTest(execute_external = False)
-
-config.suffixes = ['.c']
-
diff --git a/utils/lit/ExampleTests/TclTest/lit.local.cfg b/utils/lit/ExampleTests/TclTest/lit.local.cfg
deleted file mode 100644
index 6a37129acdf1..000000000000
--- a/utils/lit/ExampleTests/TclTest/lit.local.cfg
+++ /dev/null
@@ -1,5 +0,0 @@
-# -*- Python -*-
-
-config.test_format = lit.formats.TclTest()
-
-config.suffixes = ['.ll']
diff --git a/utils/lit/ExampleTests/TclTest/stderr-pipe.ll b/utils/lit/ExampleTests/TclTest/stderr-pipe.ll
deleted file mode 100644
index 6c55fe8a0b17..000000000000
--- a/utils/lit/ExampleTests/TclTest/stderr-pipe.ll
+++ /dev/null
@@ -1 +0,0 @@
-; RUN: gcc -### > /dev/null |& grep {gcc version}
diff --git a/utils/lit/ExampleTests/TclTest/tcl-redir-1.ll b/utils/lit/ExampleTests/TclTest/tcl-redir-1.ll
deleted file mode 100644
index 61240ba45941..000000000000
--- a/utils/lit/ExampleTests/TclTest/tcl-redir-1.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: echo 'hi' > %t.1 | echo 'hello' > %t.2
-; RUN: not grep 'hi' %t.1
-; RUN: grep 'hello' %t.2
-
-
-
-
diff --git a/utils/lit/ExampleTests/fail.c b/utils/lit/ExampleTests/fail.c
deleted file mode 100644
index 84db41a5889e..000000000000
--- a/utils/lit/ExampleTests/fail.c
+++ /dev/null
@@ -1,2 +0,0 @@
-// RUN: echo 'I am some stdout'
-// RUN: false
diff --git a/utils/lit/ExampleTests/lit.cfg b/utils/lit/ExampleTests/lit.cfg
deleted file mode 100644
index dbd574f8bd10..000000000000
--- a/utils/lit/ExampleTests/lit.cfg
+++ /dev/null
@@ -1,23 +0,0 @@
-# -*- Python -*-
-
-# Configuration file for the 'lit' test runner.
-
-# name: The name of this test suite.
-config.name = 'Examples'
-
-# suffixes: A list of file extensions to treat as test files.
-config.suffixes = ['.c', '.cpp', '.m', '.mm', '.ll']
-
-# testFormat: The test format to use to interpret tests.
-config.test_format = lit.formats.ShTest()
-
-# test_source_root: The path where tests are located (default is the test suite
-# root).
-config.test_source_root = None
-
-# test_exec_root: The path where tests are located (default is the test suite
-# root).
-config.test_exec_root = None
-
-# target_triple: Used by ShTest and TclTest formats for XFAIL checks.
-config.target_triple = 'foo'
diff --git a/utils/lit/ExampleTests/pass.c b/utils/lit/ExampleTests/pass.c
deleted file mode 100644
index 5c1031cccc41..000000000000
--- a/utils/lit/ExampleTests/pass.c
+++ /dev/null
@@ -1 +0,0 @@
-// RUN: true
diff --git a/utils/lit/ExampleTests/xfail.c b/utils/lit/ExampleTests/xfail.c
deleted file mode 100644
index b36cd99a3000..000000000000
--- a/utils/lit/ExampleTests/xfail.c
+++ /dev/null
@@ -1,2 +0,0 @@
-// RUN: false
-// XFAIL: *
diff --git a/utils/lit/ExampleTests/xpass.c b/utils/lit/ExampleTests/xpass.c
deleted file mode 100644
index ad84990f7e22..000000000000
--- a/utils/lit/ExampleTests/xpass.c
+++ /dev/null
@@ -1,2 +0,0 @@
-// RUN: true
-// XFAIL
diff --git a/utils/lit/LitConfig.py b/utils/lit/LitConfig.py
deleted file mode 100644
index 0e0a4931dca7..000000000000
--- a/utils/lit/LitConfig.py
+++ /dev/null
@@ -1,95 +0,0 @@
-class LitConfig:
-    """LitConfig - Configuration data for a 'lit' test runner instance, shared
-    across all tests.
-
-    The LitConfig object is also used to communicate with client configuration
-    files, it is always passed in as the global variable 'lit' so that
-    configuration files can access common functionality and internal components
-    easily.
-    """
-
-    # Provide access to built-in formats.
-    import LitFormats as formats
-
-    # Provide access to built-in utility functions.
-    import Util as util
-
-    def __init__(self, progname, path, quiet,
-                 useValgrind, valgrindArgs,
-                 useTclAsSh,
-                 noExecute, debug, isWindows,
-                 params):
-        # The name of the test runner.
-        self.progname = progname
-        # The items to add to the PATH environment variable.
-        self.path = list(map(str, path))
-        self.quiet = bool(quiet)
-        self.useValgrind = bool(useValgrind)
-        self.valgrindArgs = list(valgrindArgs)
-        self.useTclAsSh = bool(useTclAsSh)
-        self.noExecute = noExecute
-        self.debug = debug
-        self.isWindows = bool(isWindows)
-        self.params = dict(params)
-        self.bashPath = None
-
-        self.numErrors = 0
-        self.numWarnings = 0
-
-    def load_config(self, config, path):
-        """load_config(config, path) - Load a config object from an alternate
-        path."""
-        from TestingConfig import TestingConfig
-        return TestingConfig.frompath(path, config.parent, self,
-                                      mustExist = True,
-                                      config = config)
-
-    def getBashPath(self):
-        """getBashPath - Get the path to 'bash'"""
-        import os, Util
-
-        if self.bashPath is not None:
-            return self.bashPath
-
-        self.bashPath = Util.which('bash', os.pathsep.join(self.path))
-        if self.bashPath is None:
-            # Check some known paths.
-            for path in ('/bin/bash', '/usr/bin/bash'):
-                if os.path.exists(path):
-                    self.bashPath = path
-                    break
-
-        if self.bashPath is None:
-            self.warning("Unable to find 'bash', running Tcl tests internally.")
-            self.bashPath = ''
-
-        return self.bashPath
-
-    def _write_message(self, kind, message):
-        import inspect, os, sys
-
-        # Get the file/line where this message was generated.
-        f = inspect.currentframe()
-        # Step out of _write_message, and then out of wrapper.
-        f = f.f_back.f_back
-        file,line,_,_,_ = inspect.getframeinfo(f)
-        location = '%s:%d' % (os.path.basename(file), line)
-
-        print >>sys.stderr, '%s: %s: %s: %s' % (self.progname, location,
-                                                kind, message)
-
-    def note(self, message):
-        self._write_message('note', message)
-
-    def warning(self, message):
-        self._write_message('warning', message)
-        self.numWarnings += 1
-
-    def error(self, message):
-        self._write_message('error', message)
-        self.numErrors += 1
-
-    def fatal(self, message):
-        import sys
-        self._write_message('fatal', message)
-        sys.exit(2)
diff --git a/utils/lit/LitFormats.py b/utils/lit/LitFormats.py
deleted file mode 100644
index 270f08707041..000000000000
--- a/utils/lit/LitFormats.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from TestFormats import GoogleTest, ShTest, TclTest
-from TestFormats import SyntaxCheckTest, OneCommandPerFileTest
-
diff --git a/utils/lit/ProgressBar.py b/utils/lit/ProgressBar.py
deleted file mode 100644
index 85c95f57f7ac..000000000000
--- a/utils/lit/ProgressBar.py
+++ /dev/null
@@ -1,267 +0,0 @@
-#!/usr/bin/env python
-
-# Source: http://code.activestate.com/recipes/475116/, with
-# modifications by Daniel Dunbar.
-
-import sys, re, time
-
-class TerminalController:
-    """
-    A class that can be used to portably generate formatted output to
-    a terminal.  
-    
-    `TerminalController` defines a set of instance variables whose
-    values are initialized to the control sequence necessary to
-    perform a given action.  These can be simply included in normal
-    output to the terminal:
-
-        >>> term = TerminalController()
-        >>> print 'This is '+term.GREEN+'green'+term.NORMAL
-
-    Alternatively, the `render()` method can used, which replaces
-    '${action}' with the string required to perform 'action':
-
-        >>> term = TerminalController()
-        >>> print term.render('This is ${GREEN}green${NORMAL}')
-
-    If the terminal doesn't support a given action, then the value of
-    the corresponding instance variable will be set to ''.  As a
-    result, the above code will still work on terminals that do not
-    support color, except that their output will not be colored.
-    Also, this means that you can test whether the terminal supports a
-    given action by simply testing the truth value of the
-    corresponding instance variable:
-
-        >>> term = TerminalController()
-        >>> if term.CLEAR_SCREEN:
-        ...     print 'This terminal supports clearning the screen.'
-
-    Finally, if the width and height of the terminal are known, then
-    they will be stored in the `COLS` and `LINES` attributes.
-    """
-    # Cursor movement:
-    BOL = ''             #: Move the cursor to the beginning of the line
-    UP = ''              #: Move the cursor up one line
-    DOWN = ''            #: Move the cursor down one line
-    LEFT = ''            #: Move the cursor left one char
-    RIGHT = ''           #: Move the cursor right one char
-
-    # Deletion:
-    CLEAR_SCREEN = ''    #: Clear the screen and move to home position
-    CLEAR_EOL = ''       #: Clear to the end of the line.
-    CLEAR_BOL = ''       #: Clear to the beginning of the line.
-    CLEAR_EOS = ''       #: Clear to the end of the screen
-
-    # Output modes:
-    BOLD = ''            #: Turn on bold mode
-    BLINK = ''           #: Turn on blink mode
-    DIM = ''             #: Turn on half-bright mode
-    REVERSE = ''         #: Turn on reverse-video mode
-    NORMAL = ''          #: Turn off all modes
-
-    # Cursor display:
-    HIDE_CURSOR = ''     #: Make the cursor invisible
-    SHOW_CURSOR = ''     #: Make the cursor visible
-
-    # Terminal size:
-    COLS = None          #: Width of the terminal (None for unknown)
-    LINES = None         #: Height of the terminal (None for unknown)
-
-    # Foreground colors:
-    BLACK = BLUE = GREEN = CYAN = RED = MAGENTA = YELLOW = WHITE = ''
-    
-    # Background colors:
-    BG_BLACK = BG_BLUE = BG_GREEN = BG_CYAN = ''
-    BG_RED = BG_MAGENTA = BG_YELLOW = BG_WHITE = ''
-    
-    _STRING_CAPABILITIES = """
-    BOL=cr UP=cuu1 DOWN=cud1 LEFT=cub1 RIGHT=cuf1
-    CLEAR_SCREEN=clear CLEAR_EOL=el CLEAR_BOL=el1 CLEAR_EOS=ed BOLD=bold
-    BLINK=blink DIM=dim REVERSE=rev UNDERLINE=smul NORMAL=sgr0
-    HIDE_CURSOR=cinvis SHOW_CURSOR=cnorm""".split()
-    _COLORS = """BLACK BLUE GREEN CYAN RED MAGENTA YELLOW WHITE""".split()
-    _ANSICOLORS = "BLACK RED GREEN YELLOW BLUE MAGENTA CYAN WHITE".split()
-
-    def __init__(self, term_stream=sys.stdout):
-        """
-        Create a `TerminalController` and initialize its attributes
-        with appropriate values for the current terminal.
-        `term_stream` is the stream that will be used for terminal
-        output; if this stream is not a tty, then the terminal is
-        assumed to be a dumb terminal (i.e., have no capabilities).
-        """
-        # Curses isn't available on all platforms
-        try: import curses
-        except: return
-
-        # If the stream isn't a tty, then assume it has no capabilities.
-        if not term_stream.isatty(): return
-
-        # Check the terminal type.  If we fail, then assume that the
-        # terminal has no capabilities.
-        try: curses.setupterm()
-        except: return
-
-        # Look up numeric capabilities.
-        self.COLS = curses.tigetnum('cols')
-        self.LINES = curses.tigetnum('lines')
-        
-        # Look up string capabilities.
-        for capability in self._STRING_CAPABILITIES:
-            (attrib, cap_name) = capability.split('=')
-            setattr(self, attrib, self._tigetstr(cap_name) or '')
-
-        # Colors
-        set_fg = self._tigetstr('setf')
-        if set_fg:
-            for i,color in zip(range(len(self._COLORS)), self._COLORS):
-                setattr(self, color, curses.tparm(set_fg, i) or '')
-        set_fg_ansi = self._tigetstr('setaf')
-        if set_fg_ansi:
-            for i,color in zip(range(len(self._ANSICOLORS)), self._ANSICOLORS):
-                setattr(self, color, curses.tparm(set_fg_ansi, i) or '')
-        set_bg = self._tigetstr('setb')
-        if set_bg:
-            for i,color in zip(range(len(self._COLORS)), self._COLORS):
-                setattr(self, 'BG_'+color, curses.tparm(set_bg, i) or '')
-        set_bg_ansi = self._tigetstr('setab')
-        if set_bg_ansi:
-            for i,color in zip(range(len(self._ANSICOLORS)), self._ANSICOLORS):
-                setattr(self, 'BG_'+color, curses.tparm(set_bg_ansi, i) or '')
-
-    def _tigetstr(self, cap_name):
-        # String capabilities can include "delays" of the form "$<2>".
-        # For any modern terminal, we should be able to just ignore
-        # these, so strip them out.
-        import curses
-        cap = curses.tigetstr(cap_name) or ''
-        return re.sub(r'\$<\d+>[/*]?', '', cap)
-
-    def render(self, template):
-        """
-        Replace each $-substitutions in the given template string with
-        the corresponding terminal control string (if it's defined) or
-        '' (if it's not).
-        """
-        return re.sub(r'\$\$|\${\w+}', self._render_sub, template)
-
-    def _render_sub(self, match):
-        s = match.group()
-        if s == '$$': return s
-        else: return getattr(self, s[2:-1])
-
-#######################################################################
-# Example use case: progress bar
-#######################################################################
-
-class SimpleProgressBar:
-    """
-    A simple progress bar which doesn't need any terminal support.
-
-    This prints out a progress bar like:
-      'Header: 0 .. 10.. 20.. ...'
-    """
-
-    def __init__(self, header):
-        self.header = header
-        self.atIndex = None
-
-    def update(self, percent, message):
-        if self.atIndex is None:
-            sys.stdout.write(self.header)
-            self.atIndex = 0
-
-        next = int(percent*50)
-        if next == self.atIndex:
-            return
-
-        for i in range(self.atIndex, next):
-            idx = i % 5
-            if idx == 0:
-                sys.stdout.write('%-2d' % (i*2))
-            elif idx == 1:
-                pass # Skip second char
-            elif idx < 4:
-                sys.stdout.write('.')
-            else:
-                sys.stdout.write(' ')
-        sys.stdout.flush()
-        self.atIndex = next
-
-    def clear(self):
-        if self.atIndex is not None:
-            sys.stdout.write('\n')
-            sys.stdout.flush()
-            self.atIndex = None
-
-class ProgressBar:
-    """
-    A 3-line progress bar, which looks like::
-    
-                                Header
-        20% [===========----------------------------------]
-                           progress message
-
-    The progress bar is colored, if the terminal supports color
-    output; and adjusts to the width of the terminal.
-    """
-    BAR = '%s${GREEN}[${BOLD}%s%s${NORMAL}${GREEN}]${NORMAL}%s\n'
-    HEADER = '${BOLD}${CYAN}%s${NORMAL}\n\n'
-        
-    def __init__(self, term, header, useETA=True):
-        self.term = term
-        if not (self.term.CLEAR_EOL and self.term.UP and self.term.BOL):
-            raise ValueError("Terminal isn't capable enough -- you "
-                             "should use a simpler progress dispaly.")
-        self.width = self.term.COLS or 75
-        self.bar = term.render(self.BAR)
-        self.header = self.term.render(self.HEADER % header.center(self.width))
-        self.cleared = 1 #: true if we haven't drawn the bar yet.
-        self.useETA = useETA
-        if self.useETA:
-            self.startTime = time.time()
-        self.update(0, '')
-
-    def update(self, percent, message):
-        if self.cleared:
-            sys.stdout.write(self.header)
-            self.cleared = 0
-        prefix = '%3d%% ' % (percent*100,)
-        suffix = ''
-        if self.useETA:
-            elapsed = time.time() - self.startTime
-            if percent > .0001 and elapsed > 1:
-                total = elapsed / percent
-                eta = int(total - elapsed)
-                h = eta//3600.
-                m = (eta//60) % 60
-                s = eta % 60
-                suffix = ' ETA: %02d:%02d:%02d'%(h,m,s)
-        barWidth = self.width - len(prefix) - len(suffix) - 2
-        n = int(barWidth*percent)
-        if len(message) < self.width:
-            message = message + ' '*(self.width - len(message))
-        else:
-            message = '... ' + message[-(self.width-4):]
-        sys.stdout.write(
-            self.term.BOL + self.term.UP + self.term.CLEAR_EOL +
-            (self.bar % (prefix, '='*n, '-'*(barWidth-n), suffix)) +
-            self.term.CLEAR_EOL + message)
-
-    def clear(self):
-        if not self.cleared:
-            sys.stdout.write(self.term.BOL + self.term.CLEAR_EOL +
-                             self.term.UP + self.term.CLEAR_EOL +
-                             self.term.UP + self.term.CLEAR_EOL)
-            self.cleared = 1
-
-def test():
-    import time
-    tc = TerminalController()
-    p = ProgressBar(tc, 'Tests')
-    for i in range(101):
-        p.update(i/100., str(i))        
-        time.sleep(.3)
-
-if __name__=='__main__':
-    test()
diff --git a/utils/lit/ShCommands.py b/utils/lit/ShCommands.py
deleted file mode 100644
index 4550437ce227..000000000000
--- a/utils/lit/ShCommands.py
+++ /dev/null
@@ -1,85 +0,0 @@
-class Command:
-    def __init__(self, args, redirects):
-        self.args = list(args)
-        self.redirects = list(redirects)
-
-    def __repr__(self):
-        return 'Command(%r, %r)' % (self.args, self.redirects)
-
-    def __cmp__(self, other):
-        if not isinstance(other, Command):
-            return -1
-
-        return cmp((self.args, self.redirects),
-                   (other.args, other.redirects))
-
-    def toShell(self, file):
-        for arg in self.args:
-            if "'" not in arg:
-                quoted = "'%s'" % arg
-            elif '"' not in arg and '$' not in arg:
-                quoted = '"%s"' % arg
-            else:
-                raise NotImplementedError,'Unable to quote %r' % arg
-            print >>file, quoted,
-
-            # For debugging / validation.
-            import ShUtil
-            dequoted = list(ShUtil.ShLexer(quoted).lex())
-            if dequoted != [arg]:
-                raise NotImplementedError,'Unable to quote %r' % arg
-
-        for r in self.redirects:
-            if len(r[0]) == 1:
-                print >>file, "%s '%s'" % (r[0][0], r[1]),
-            else:
-                print >>file, "%s%s '%s'" % (r[0][1], r[0][0], r[1]),
-
-class Pipeline:
-    def __init__(self, commands, negate=False, pipe_err=False):
-        self.commands = commands
-        self.negate = negate
-        self.pipe_err = pipe_err
-
-    def __repr__(self):
-        return 'Pipeline(%r, %r, %r)' % (self.commands, self.negate,
-                                         self.pipe_err)
-
-    def __cmp__(self, other):
-        if not isinstance(other, Pipeline):
-            return -1
-
-        return cmp((self.commands, self.negate, self.pipe_err),
-                   (other.commands, other.negate, self.pipe_err))
-
-    def toShell(self, file, pipefail=False):
-        if pipefail != self.pipe_err:
-            raise ValueError,'Inconsistent "pipefail" attribute!'
-        if self.negate:
-            print >>file, '!',
-        for cmd in self.commands:
-            cmd.toShell(file)
-            if cmd is not self.commands[-1]:
-                print >>file, '|\n ',
-
-class Seq:
-    def __init__(self, lhs, op, rhs):
-        assert op in (';', '&', '||', '&&')
-        self.op = op
-        self.lhs = lhs
-        self.rhs = rhs
-
-    def __repr__(self):
-        return 'Seq(%r, %r, %r)' % (self.lhs, self.op, self.rhs)
-
-    def __cmp__(self, other):
-        if not isinstance(other, Seq):
-            return -1
-
-        return cmp((self.lhs, self.op, self.rhs),
-                   (other.lhs, other.op, other.rhs))
-
-    def toShell(self, file, pipefail=False):
-        self.lhs.toShell(file, pipefail)
-        print >>file, ' %s\n' % self.op
-        self.rhs.toShell(file, pipefail)
diff --git a/utils/lit/ShUtil.py b/utils/lit/ShUtil.py
deleted file mode 100644
index c4bbb3d3731d..000000000000
--- a/utils/lit/ShUtil.py
+++ /dev/null
@@ -1,346 +0,0 @@
-import itertools
-
-import Util
-from ShCommands import Command, Pipeline, Seq
-
-class ShLexer:
-    def __init__(self, data, win32Escapes = False):
-        self.data = data
-        self.pos = 0
-        self.end = len(data)
-        self.win32Escapes = win32Escapes
-
-    def eat(self):
-        c = self.data[self.pos]
-        self.pos += 1
-        return c
-
-    def look(self):
-        return self.data[self.pos]
-
-    def maybe_eat(self, c):
-        """
-        maybe_eat(c) - Consume the character c if it is the next character,
-        returning True if a character was consumed. """
-        if self.data[self.pos] == c:
-            self.pos += 1
-            return True
-        return False
-
-    def lex_arg_fast(self, c):
-        # Get the leading whitespace free section.
-        chunk = self.data[self.pos - 1:].split(None, 1)[0]
-        
-        # If it has special characters, the fast path failed.
-        if ('|' in chunk or '&' in chunk or 
-            '<' in chunk or '>' in chunk or
-            "'" in chunk or '"' in chunk or
-            '\\' in chunk):
-            return None
-        
-        self.pos = self.pos - 1 + len(chunk)
-        return chunk
-        
-    def lex_arg_slow(self, c):
-        if c in "'\"":
-            str = self.lex_arg_quoted(c)
-        else:
-            str = c
-        while self.pos != self.end:
-            c = self.look()
-            if c.isspace() or c in "|&":
-                break
-            elif c in '><':
-                # This is an annoying case; we treat '2>' as a single token so
-                # we don't have to track whitespace tokens.
-
-                # If the parse string isn't an integer, do the usual thing.
-                if not str.isdigit():
-                    break
-
-                # Otherwise, lex the operator and convert to a redirection
-                # token.
-                num = int(str)
-                tok = self.lex_one_token()
-                assert isinstance(tok, tuple) and len(tok) == 1
-                return (tok[0], num)                    
-            elif c == '"':
-                self.eat()
-                str += self.lex_arg_quoted('"')
-            elif not self.win32Escapes and c == '\\':
-                # Outside of a string, '\\' escapes everything.
-                self.eat()
-                if self.pos == self.end:
-                    Util.warning("escape at end of quoted argument in: %r" % 
-                                 self.data)
-                    return str
-                str += self.eat()
-            else:
-                str += self.eat()
-        return str
-
-    def lex_arg_quoted(self, delim):
-        str = ''
-        while self.pos != self.end:
-            c = self.eat()
-            if c == delim:
-                return str
-            elif c == '\\' and delim == '"':
-                # Inside a '"' quoted string, '\\' only escapes the quote
-                # character and backslash, otherwise it is preserved.
-                if self.pos == self.end:
-                    Util.warning("escape at end of quoted argument in: %r" % 
-                                 self.data)
-                    return str
-                c = self.eat()
-                if c == '"': # 
-                    str += '"'
-                elif c == '\\':
-                    str += '\\'
-                else:
-                    str += '\\' + c
-            else:
-                str += c
-        Util.warning("missing quote character in %r" % self.data)
-        return str
-    
-    def lex_arg_checked(self, c):
-        pos = self.pos
-        res = self.lex_arg_fast(c)
-        end = self.pos
-
-        self.pos = pos
-        reference = self.lex_arg_slow(c)
-        if res is not None:
-            if res != reference:
-                raise ValueError,"Fast path failure: %r != %r" % (res, reference)
-            if self.pos != end:
-                raise ValueError,"Fast path failure: %r != %r" % (self.pos, end)
-        return reference
-        
-    def lex_arg(self, c):
-        return self.lex_arg_fast(c) or self.lex_arg_slow(c)
-        
-    def lex_one_token(self):
-        """
-        lex_one_token - Lex a single 'sh' token. """
-
-        c = self.eat()
-        if c in ';!':
-            return (c,)
-        if c == '|':
-            if self.maybe_eat('|'):
-                return ('||',)
-            return (c,)
-        if c == '&':
-            if self.maybe_eat('&'):
-                return ('&&',)
-            if self.maybe_eat('>'): 
-                return ('&>',)
-            return (c,)
-        if c == '>':
-            if self.maybe_eat('&'):
-                return ('>&',)
-            if self.maybe_eat('>'):
-                return ('>>',)
-            return (c,)
-        if c == '<':
-            if self.maybe_eat('&'):
-                return ('<&',)
-            if self.maybe_eat('>'):
-                return ('<<',)
-            return (c,)
-
-        return self.lex_arg(c)
-
-    def lex(self):
-        while self.pos != self.end:
-            if self.look().isspace():
-                self.eat()
-            else:
-                yield self.lex_one_token()
-
-###
- 
-class ShParser:
-    def __init__(self, data, win32Escapes = False):
-        self.data = data
-        self.tokens = ShLexer(data, win32Escapes = win32Escapes).lex()
-    
-    def lex(self):
-        try:
-            return self.tokens.next()
-        except StopIteration:
-            return None
-    
-    def look(self):
-        next = self.lex()
-        if next is not None:
-            self.tokens = itertools.chain([next], self.tokens)
-        return next
-    
-    def parse_command(self):
-        tok = self.lex()
-        if not tok:
-            raise ValueError,"empty command!"
-        if isinstance(tok, tuple):
-            raise ValueError,"syntax error near unexpected token %r" % tok[0]
-        
-        args = [tok]
-        redirects = []
-        while 1:
-            tok = self.look()
-
-            # EOF?
-            if tok is None:
-                break
-
-            # If this is an argument, just add it to the current command.
-            if isinstance(tok, str):
-                args.append(self.lex())
-                continue
-
-            # Otherwise see if it is a terminator.
-            assert isinstance(tok, tuple)
-            if tok[0] in ('|',';','&','||','&&'):
-                break
-            
-            # Otherwise it must be a redirection.
-            op = self.lex()
-            arg = self.lex()
-            if not arg:
-                raise ValueError,"syntax error near token %r" % op[0]
-            redirects.append((op, arg))
-
-        return Command(args, redirects)
-
-    def parse_pipeline(self):
-        negate = False
-        if self.look() == ('!',):
-            self.lex()
-            negate = True
-
-        commands = [self.parse_command()]
-        while self.look() == ('|',):
-            self.lex()
-            commands.append(self.parse_command())
-        return Pipeline(commands, negate)
-            
-    def parse(self):
-        lhs = self.parse_pipeline()
-
-        while self.look():
-            operator = self.lex()
-            assert isinstance(operator, tuple) and len(operator) == 1
-
-            if not self.look():
-                raise ValueError, "missing argument to operator %r" % operator[0]
-            
-            # FIXME: Operator precedence!!
-            lhs = Seq(lhs, operator[0], self.parse_pipeline())
-
-        return lhs
-
-###
-
-import unittest
-
-class TestShLexer(unittest.TestCase):
-    def lex(self, str, *args, **kwargs):
-        return list(ShLexer(str, *args, **kwargs).lex())
-
-    def test_basic(self):
-        self.assertEqual(self.lex('a|b>c&d<e'),
-                         ['a', ('|',), 'b', ('>',), 'c', ('&',), 'd', 
-                          ('<',), 'e'])
-
-    def test_redirection_tokens(self):
-        self.assertEqual(self.lex('a2>c'),
-                         ['a2', ('>',), 'c'])
-        self.assertEqual(self.lex('a 2>c'),
-                         ['a', ('>',2), 'c'])
-        
-    def test_quoting(self):
-        self.assertEqual(self.lex(""" 'a' """),
-                         ['a'])
-        self.assertEqual(self.lex(""" "hello\\"world" """),
-                         ['hello"world'])
-        self.assertEqual(self.lex(""" "hello\\'world" """),
-                         ["hello\\'world"])
-        self.assertEqual(self.lex(""" "hello\\\\world" """),
-                         ["hello\\world"])
-        self.assertEqual(self.lex(""" he"llo wo"rld """),
-                         ["hello world"])
-        self.assertEqual(self.lex(""" a\\ b a\\\\b """),
-                         ["a b", "a\\b"])
-        self.assertEqual(self.lex(""" "" "" """),
-                         ["", ""])
-        self.assertEqual(self.lex(""" a\\ b """, win32Escapes = True),
-                         ['a\\', 'b'])
-
-class TestShParse(unittest.TestCase):
-    def parse(self, str):
-        return ShParser(str).parse()
-
-    def test_basic(self):
-        self.assertEqual(self.parse('echo hello'),
-                         Pipeline([Command(['echo', 'hello'], [])], False))
-        self.assertEqual(self.parse('echo ""'),
-                         Pipeline([Command(['echo', ''], [])], False))
-
-    def test_redirection(self):
-        self.assertEqual(self.parse('echo hello > c'),
-                         Pipeline([Command(['echo', 'hello'], 
-                                           [((('>'),), 'c')])], False))
-        self.assertEqual(self.parse('echo hello > c >> d'),
-                         Pipeline([Command(['echo', 'hello'], [(('>',), 'c'),
-                                                     (('>>',), 'd')])], False))
-        self.assertEqual(self.parse('a 2>&1'),
-                         Pipeline([Command(['a'], [(('>&',2), '1')])], False))
-
-    def test_pipeline(self):
-        self.assertEqual(self.parse('a | b'),
-                         Pipeline([Command(['a'], []),
-                                   Command(['b'], [])],
-                                  False))
-
-        self.assertEqual(self.parse('a | b | c'),
-                         Pipeline([Command(['a'], []),
-                                   Command(['b'], []),
-                                   Command(['c'], [])],
-                                  False))
-
-        self.assertEqual(self.parse('! a'),
-                         Pipeline([Command(['a'], [])],
-                                  True))
-
-    def test_list(self):        
-        self.assertEqual(self.parse('a ; b'),
-                         Seq(Pipeline([Command(['a'], [])], False),
-                             ';',
-                             Pipeline([Command(['b'], [])], False)))
-
-        self.assertEqual(self.parse('a & b'),
-                         Seq(Pipeline([Command(['a'], [])], False),
-                             '&',
-                             Pipeline([Command(['b'], [])], False)))
-
-        self.assertEqual(self.parse('a && b'),
-                         Seq(Pipeline([Command(['a'], [])], False),
-                             '&&',
-                             Pipeline([Command(['b'], [])], False)))
-
-        self.assertEqual(self.parse('a || b'),
-                         Seq(Pipeline([Command(['a'], [])], False),
-                             '||',
-                             Pipeline([Command(['b'], [])], False)))
-
-        self.assertEqual(self.parse('a && b || c'),
-                         Seq(Seq(Pipeline([Command(['a'], [])], False),
-                                 '&&',
-                                 Pipeline([Command(['b'], [])], False)),
-                             '||',
-                             Pipeline([Command(['c'], [])], False)))
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/utils/lit/TclUtil.py b/utils/lit/TclUtil.py
deleted file mode 100644
index 4a3f34508d64..000000000000
--- a/utils/lit/TclUtil.py
+++ /dev/null
@@ -1,322 +0,0 @@
-import itertools
-
-from ShCommands import Command, Pipeline
-
-def tcl_preprocess(data):
-    # Tcl has a preprocessing step to replace escaped newlines.
-    i = data.find('\\\n')
-    if i == -1:
-        return data
-
-    # Replace '\\\n' and subsequent whitespace by a single space.
-    n = len(data)
-    str = data[:i]
-    i += 2
-    while i < n and data[i] in ' \t':
-        i += 1
-    return str + ' ' + data[i:]
-
-class TclLexer:
-    """TclLexer - Lex a string into "words", following the Tcl syntax."""
-
-    def __init__(self, data):
-        self.data = tcl_preprocess(data)
-        self.pos = 0
-        self.end = len(self.data)
-
-    def at_end(self):
-        return self.pos == self.end
-
-    def eat(self):
-        c = self.data[self.pos]
-        self.pos += 1
-        return c
-
-    def look(self):
-        return self.data[self.pos]
-
-    def maybe_eat(self, c):
-        """
-        maybe_eat(c) - Consume the character c if it is the next character,
-        returning True if a character was consumed. """
-        if self.data[self.pos] == c:
-            self.pos += 1
-            return True
-        return False
-
-    def escape(self, c):
-        if c == 'a':
-            return '\x07'
-        elif c == 'b':
-            return '\x08'
-        elif c == 'f':
-            return '\x0c'
-        elif c == 'n':
-            return '\n'
-        elif c == 'r':
-            return '\r'
-        elif c == 't':
-            return '\t'
-        elif c == 'v':
-            return '\x0b'
-        elif c in 'uxo':
-            raise ValueError,'Invalid quoted character %r' % c
-        else:
-            return c
-        
-    def lex_braced(self):
-        # Lex until whitespace or end of string, the opening brace has already
-        # been consumed.
-
-        str = ''        
-        while 1:
-            if self.at_end():
-                raise ValueError,"Unterminated '{' quoted word"
-            
-            c = self.eat()
-            if c == '}':
-                break
-            elif c == '{':
-                str += '{' + self.lex_braced() + '}'
-            elif c == '\\' and self.look() in '{}':
-                str += self.eat()
-            else:
-                str += c
-
-        return str
-
-    def lex_quoted(self):
-        str = ''
-
-        while 1:
-            if self.at_end():
-                raise ValueError,"Unterminated '\"' quoted word"
-            
-            c = self.eat()
-            if c == '"':
-                break
-            elif c == '\\':
-                if self.at_end():
-                    raise ValueError,'Missing quoted character'
-
-                str += self.escape(self.eat())
-            else:
-                str += c
-
-        return str
-
-    def lex_unquoted(self, process_all=False):
-        # Lex until whitespace or end of string.
-        str = ''
-        while not self.at_end():
-            if not process_all:
-                if self.look().isspace() or self.look() == ';':
-                    break
-
-            c = self.eat()
-            if c == '\\':
-                if self.at_end():
-                    raise ValueError,'Missing quoted character'
-
-                str += self.escape(self.eat())
-            elif c == '[':
-                raise NotImplementedError, ('Command substitution is '
-                                            'not supported')
-            elif c == '$' and not self.at_end() and (self.look().isalpha() or
-                                                     self.look() == '{'):
-                raise NotImplementedError, ('Variable substitution is '
-                                            'not supported')
-            else:
-                str += c
-
-        return str
-
-    def lex_one_token(self):
-        if self.maybe_eat('"'):
-            return self.lex_quoted()
-        elif self.maybe_eat('{'):
-            # Check for argument substitution.
-            if not self.maybe_eat('*'):
-                return self.lex_braced()
-
-            if not self.maybe_eat('}'):
-                    return '*' + self.lex_braced()
-                
-            if self.at_end() or self.look().isspace():
-                return '*'
-
-            raise NotImplementedError, "Argument substitution is unsupported"
-        else:
-            return self.lex_unquoted()
-
-    def lex(self):
-        while not self.at_end():
-            c = self.look()
-            if c in ' \t':
-                self.eat()
-            elif c in ';\n':
-                self.eat()
-                yield (';',)
-            else:
-                yield self.lex_one_token()
-
-class TclExecCommand:
-    kRedirectPrefixes1 = ('<', '>')
-    kRedirectPrefixes2 = ('<@', '<<', '2>', '>&', '>>', '>@')
-    kRedirectPrefixes3 = ('2>@', '2>>', '>>&', '>&@')
-    kRedirectPrefixes4 = ('2>@1',)
-
-    def __init__(self, args):
-        self.args = iter(args)
-
-    def lex(self):
-        try:
-            return self.args.next()
-        except StopIteration:
-            return None
-
-    def look(self):
-        next = self.lex()
-        if next is not None:
-            self.args = itertools.chain([next], self.args)
-        return next
-
-    def parse_redirect(self, tok, length):
-        if len(tok) == length:
-            arg = self.lex()
-            if arg is None:
-                raise ValueError,'Missing argument to %r redirection' % tok
-        else:
-            tok,arg = tok[:length],tok[length:]
-
-        if tok[0] == '2':
-            op = (tok[1:],2)
-        else:
-            op = (tok,)
-        return (op, arg)
-
-    def parse_pipeline(self):
-        if self.look() is None:
-            raise ValueError,"Expected at least one argument to exec"
-
-        commands = [Command([],[])]
-        while 1:
-            arg = self.lex()
-            if arg is None:
-                break
-            elif arg == '|':
-                commands.append(Command([],[]))
-            elif arg == '|&':
-                # Write this as a redirect of stderr; it must come first because
-                # stdout may have already been redirected.
-                commands[-1].redirects.insert(0, (('>&',2),'1'))
-                commands.append(Command([],[]))
-            elif arg[:4] in TclExecCommand.kRedirectPrefixes4:
-                commands[-1].redirects.append(self.parse_redirect(arg, 4))
-            elif arg[:3] in TclExecCommand.kRedirectPrefixes3:
-                commands[-1].redirects.append(self.parse_redirect(arg, 3))
-            elif arg[:2] in TclExecCommand.kRedirectPrefixes2:
-                commands[-1].redirects.append(self.parse_redirect(arg, 2))
-            elif arg[:1] in TclExecCommand.kRedirectPrefixes1:
-                commands[-1].redirects.append(self.parse_redirect(arg, 1))
-            else:
-                commands[-1].args.append(arg)
-
-        return Pipeline(commands, False, pipe_err=True)
-
-    def parse(self):
-        ignoreStderr = False
-        keepNewline = False
-
-        # Parse arguments.
-        while 1:
-            next = self.look()
-            if not isinstance(next, str) or next[0] != '-':
-                break
-
-            if next == '--':
-                self.lex()
-                break
-            elif next == '-ignorestderr':
-                ignoreStderr = True
-            elif next == '-keepnewline':
-                keepNewline = True
-            else:
-                raise ValueError,"Invalid exec argument %r" % next
-
-        return (ignoreStderr, keepNewline, self.parse_pipeline())
-
-###
-
-import unittest
-
-class TestTclLexer(unittest.TestCase):
-    def lex(self, str, *args, **kwargs):
-        return list(TclLexer(str, *args, **kwargs).lex())
-
-    def test_preprocess(self):
-        self.assertEqual(tcl_preprocess('a b'), 'a b')
-        self.assertEqual(tcl_preprocess('a\\\nb c'), 'a b c')
-
-    def test_unquoted(self):
-        self.assertEqual(self.lex('a b c'),
-                         ['a', 'b', 'c'])
-        self.assertEqual(self.lex(r'a\nb\tc\ '),
-                         ['a\nb\tc '])
-        self.assertEqual(self.lex(r'a \\\$b c $\\'),
-                         ['a', r'\$b', 'c', '$\\'])
-
-    def test_braced(self):
-        self.assertEqual(self.lex('a {b c} {}'),
-                         ['a', 'b c', ''])
-        self.assertEqual(self.lex(r'a {b {c\n}}'),
-                         ['a', 'b {c\\n}'])
-        self.assertEqual(self.lex(r'a {b\{}'),
-                         ['a', 'b{'])
-        self.assertEqual(self.lex(r'{*}'), ['*'])
-        self.assertEqual(self.lex(r'{*} a'), ['*', 'a'])
-        self.assertEqual(self.lex(r'{*} a'), ['*', 'a'])
-        self.assertEqual(self.lex('{a\\\n   b}'),
-                         ['a b'])
-
-    def test_quoted(self):
-        self.assertEqual(self.lex('a "b c"'),
-                         ['a', 'b c'])
-
-    def test_terminators(self):
-        self.assertEqual(self.lex('a\nb'),
-                         ['a', (';',), 'b'])
-        self.assertEqual(self.lex('a;b'),
-                         ['a', (';',), 'b'])
-        self.assertEqual(self.lex('a   ;   b'),
-                         ['a', (';',), 'b'])
-
-class TestTclExecCommand(unittest.TestCase):
-    def parse(self, str):
-        return TclExecCommand(list(TclLexer(str).lex())).parse()
-
-    def test_basic(self):
-        self.assertEqual(self.parse('echo hello'),
-                         (False, False,
-                          Pipeline([Command(['echo', 'hello'], [])],
-                                   False, True)))
-        self.assertEqual(self.parse('echo hello | grep hello'),
-                         (False, False,
-                          Pipeline([Command(['echo', 'hello'], []),
-                                    Command(['grep', 'hello'], [])],
-                                   False, True)))
-
-    def test_redirect(self):
-        self.assertEqual(self.parse('echo hello > a >b >>c 2> d |& e'),
-                         (False, False,
-                          Pipeline([Command(['echo', 'hello'],
-                                            [(('>&',2),'1'),
-                                             (('>',),'a'),
-                                             (('>',),'b'),
-                                             (('>>',),'c'),
-                                             (('>',2),'d')]),
-                                    Command(['e'], [])],
-                                   False, True)))
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/utils/lit/Test.py b/utils/lit/Test.py
deleted file mode 100644
index 1f6556ba8595..000000000000
--- a/utils/lit/Test.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import os
-
-# Test results.
-
-class TestResult:
-    def __init__(self, name, isFailure):
-        self.name = name
-        self.isFailure = isFailure
-
-PASS        = TestResult('PASS', False)
-XFAIL       = TestResult('XFAIL', False)
-FAIL        = TestResult('FAIL', True)
-XPASS       = TestResult('XPASS', True)
-UNRESOLVED  = TestResult('UNRESOLVED', True)
-UNSUPPORTED = TestResult('UNSUPPORTED', False)
-
-# Test classes.
-
-class TestFormat:
-    """TestFormat - Test information provider."""
-
-    def __init__(self, name):
-        self.name = name
-
-class TestSuite:
-    """TestSuite - Information on a group of tests.
-
-    A test suite groups together a set of logically related tests.
-    """
-
-    def __init__(self, name, source_root, exec_root, config):
-        self.name = name
-        self.source_root = source_root
-        self.exec_root = exec_root
-        # The test suite configuration.
-        self.config = config
-
-    def getSourcePath(self, components):
-        return os.path.join(self.source_root, *components)
-
-    def getExecPath(self, components):
-        return os.path.join(self.exec_root, *components)
-
-class Test:
-    """Test - Information on a single test instance."""
-
-    def __init__(self, suite, path_in_suite, config):
-        self.suite = suite
-        self.path_in_suite = path_in_suite
-        self.config = config
-        # The test result code, once complete.
-        self.result = None
-        # Any additional output from the test, once complete.
-        self.output = None
-        # The wall time to execute this test, if timing and once complete.
-        self.elapsed = None
-        # The repeat index of this test, or None.
-        self.index = None
-
-    def copyWithIndex(self, index):
-        import copy
-        res = copy.copy(self)
-        res.index = index
-        return res
-
-    def setResult(self, result, output, elapsed):
-        assert self.result is None, "Test result already set!"
-        self.result = result
-        self.output = output
-        self.elapsed = elapsed
-
-    def getFullName(self):
-        return self.suite.config.name + '::' + '/'.join(self.path_in_suite)
-
-    def getSourcePath(self):
-        return self.suite.getSourcePath(self.path_in_suite)
-
-    def getExecPath(self):
-        return self.suite.getExecPath(self.path_in_suite)
diff --git a/utils/lit/TestFormats.py b/utils/lit/TestFormats.py
deleted file mode 100644
index 5dfd54ac5ec0..000000000000
--- a/utils/lit/TestFormats.py
+++ /dev/null
@@ -1,189 +0,0 @@
-import os
-
-import Test
-import TestRunner
-import Util
-
-class GoogleTest(object):
-    def __init__(self, test_sub_dir, test_suffix):
-        self.test_sub_dir = str(test_sub_dir)
-        self.test_suffix = str(test_suffix)
-
-    def getGTestTests(self, path, litConfig):
-        """getGTestTests(path) - [name]
-        
-        Return the tests available in gtest executable."""
-
-        try:
-            lines = Util.capture([path, '--gtest_list_tests']).split('\n')
-        except:
-            litConfig.error("unable to discover google-tests in %r" % path)
-            raise StopIteration
-
-        nested_tests = []
-        for ln in lines:
-            if not ln.strip():
-                continue
-
-            prefix = ''
-            index = 0
-            while ln[index*2:index*2+2] == '  ':
-                index += 1
-            while len(nested_tests) > index:
-                nested_tests.pop()
-            
-            ln = ln[index*2:]
-            if ln.endswith('.'):
-                nested_tests.append(ln)
-            else:
-                yield ''.join(nested_tests) + ln
-
-    def getTestsInDirectory(self, testSuite, path_in_suite,
-                            litConfig, localConfig):
-        source_path = testSuite.getSourcePath(path_in_suite)
-        for filename in os.listdir(source_path):
-            # Check for the one subdirectory (build directory) tests will be in.
-            if filename != self.test_sub_dir:
-                continue
-
-            filepath = os.path.join(source_path, filename)
-            for subfilename in os.listdir(filepath):
-                if subfilename.endswith(self.test_suffix):
-                    execpath = os.path.join(filepath, subfilename)
-
-                    # Discover the tests in this executable.
-                    for name in self.getGTestTests(execpath, litConfig):
-                        testPath = path_in_suite + (filename, subfilename, name)
-                        yield Test.Test(testSuite, testPath, localConfig)
-
-    def execute(self, test, litConfig):
-        testPath,testName = os.path.split(test.getSourcePath())
-        while not os.path.exists(testPath):
-            # Handle GTest parametrized and typed tests, whose name includes
-            # some '/'s.
-            testPath, namePrefix = os.path.split(testPath)
-            testName = os.path.join(namePrefix, testName)
-
-        cmd = [testPath, '--gtest_filter=' + testName]
-        out, err, exitCode = TestRunner.executeCommand(cmd)
-            
-        if not exitCode:
-            return Test.PASS,''
-
-        return Test.FAIL, out + err
-
-###
-
-class FileBasedTest(object):
-    def getTestsInDirectory(self, testSuite, path_in_suite,
-                            litConfig, localConfig):
-        source_path = testSuite.getSourcePath(path_in_suite)
-        for filename in os.listdir(source_path):
-            filepath = os.path.join(source_path, filename)
-            if not os.path.isdir(filepath):
-                base,ext = os.path.splitext(filename)
-                if ext in localConfig.suffixes:
-                    yield Test.Test(testSuite, path_in_suite + (filename,),
-                                    localConfig)
-
-class ShTest(FileBasedTest):
-    def __init__(self, execute_external = False):
-        self.execute_external = execute_external
-
-    def execute(self, test, litConfig):
-        return TestRunner.executeShTest(test, litConfig,
-                                        self.execute_external)
-
-class TclTest(FileBasedTest):
-    def execute(self, test, litConfig):
-        return TestRunner.executeTclTest(test, litConfig)
-
-###
-
-import re
-import tempfile
-
-class OneCommandPerFileTest:
-    # FIXME: Refactor into generic test for running some command on a directory
-    # of inputs.
-
-    def __init__(self, command, dir, recursive=False,
-                 pattern=".*", useTempInput=False):
-        if isinstance(command, str):
-            self.command = [command]
-        else:
-            self.command = list(command)
-        self.dir = str(dir)
-        self.recursive = bool(recursive)
-        self.pattern = re.compile(pattern)
-        self.useTempInput = useTempInput
-
-    def getTestsInDirectory(self, testSuite, path_in_suite,
-                            litConfig, localConfig):
-        for dirname,subdirs,filenames in os.walk(self.dir):
-            if not self.recursive:
-                subdirs[:] = []
-
-            subdirs[:] = [d for d in subdirs
-                          if (d != '.svn' and
-                              d not in localConfig.excludes)]
-
-            for filename in filenames:
-                if (not self.pattern.match(filename) or
-                    filename in localConfig.excludes):
-                    continue
-
-                path = os.path.join(dirname,filename)
-                suffix = path[len(self.dir):]
-                if suffix.startswith(os.sep):
-                    suffix = suffix[1:]
-                test = Test.Test(testSuite,
-                                 path_in_suite + tuple(suffix.split(os.sep)),
-                                 localConfig)
-                # FIXME: Hack?
-                test.source_path = path
-                yield test
-
-    def createTempInput(self, tmp, test):
-        abstract
-
-    def execute(self, test, litConfig):
-        if test.config.unsupported:
-            return (Test.UNSUPPORTED, 'Test is unsupported')
-
-        cmd = list(self.command)
-
-        # If using temp input, create a temporary file and hand it to the
-        # subclass.
-        if self.useTempInput:
-            tmp = tempfile.NamedTemporaryFile(suffix='.cpp')
-            self.createTempInput(tmp, test)
-            tmp.flush()
-            cmd.append(tmp.name)
-        else:
-            cmd.append(test.source_path)
-
-        out, err, exitCode = TestRunner.executeCommand(cmd)
-
-        diags = out + err
-        if not exitCode and not diags.strip():
-            return Test.PASS,''
-
-        # Try to include some useful information.
-        report = """Command: %s\n""" % ' '.join(["'%s'" % a
-                                                 for a in cmd])
-        if self.useTempInput:
-            report += """Temporary File: %s\n""" % tmp.name
-            report += "--\n%s--\n""" % open(tmp.name).read()
-        report += """Output:\n--\n%s--""" % diags
-
-        return Test.FAIL, report
-
-class SyntaxCheckTest(OneCommandPerFileTest):
-    def __init__(self, compiler, dir, extra_cxx_args=[], *args, **kwargs):
-        cmd = [compiler, '-x', 'c++', '-fsyntax-only'] + extra_cxx_args
-        OneCommandPerFileTest.__init__(self, cmd, dir,
-                                       useTempInput=1, *args, **kwargs)
-
-    def createTempInput(self, tmp, test):
-        print >>tmp, '#include "%s"' % test.source_path
diff --git a/utils/lit/TestRunner.py b/utils/lit/TestRunner.py
deleted file mode 100644
index 20fbc6c13a9f..000000000000
--- a/utils/lit/TestRunner.py
+++ /dev/null
@@ -1,517 +0,0 @@
-import os, signal, subprocess, sys
-import StringIO
-
-import ShUtil
-import Test
-import Util
-
-import platform
-import tempfile
-
-class InternalShellError(Exception):
-    def __init__(self, command, message):
-        self.command = command
-        self.message = message
-
-# Don't use close_fds on Windows.
-kUseCloseFDs = platform.system() != 'Windows'
-
-# Use temporary files to replace /dev/null on Windows.
-kAvoidDevNull = platform.system() == 'Windows'
-
-def executeCommand(command, cwd=None, env=None):
-    p = subprocess.Popen(command, cwd=cwd,
-                         stdin=subprocess.PIPE,
-                         stdout=subprocess.PIPE,
-                         stderr=subprocess.PIPE,
-                         env=env)
-    out,err = p.communicate()
-    exitCode = p.wait()
-
-    # Detect Ctrl-C in subprocess.
-    if exitCode == -signal.SIGINT:
-        raise KeyboardInterrupt
-
-    return out, err, exitCode
-
-def executeShCmd(cmd, cfg, cwd, results):
-    if isinstance(cmd, ShUtil.Seq):
-        if cmd.op == ';':
-            res = executeShCmd(cmd.lhs, cfg, cwd, results)
-            return executeShCmd(cmd.rhs, cfg, cwd, results)
-
-        if cmd.op == '&':
-            raise NotImplementedError,"unsupported test command: '&'"
-
-        if cmd.op == '||':
-            res = executeShCmd(cmd.lhs, cfg, cwd, results)
-            if res != 0:
-                res = executeShCmd(cmd.rhs, cfg, cwd, results)
-            return res
-        if cmd.op == '&&':
-            res = executeShCmd(cmd.lhs, cfg, cwd, results)
-            if res is None:
-                return res
-
-            if res == 0:
-                res = executeShCmd(cmd.rhs, cfg, cwd, results)
-            return res
-
-        raise ValueError,'Unknown shell command: %r' % cmd.op
-
-    assert isinstance(cmd, ShUtil.Pipeline)
-    procs = []
-    input = subprocess.PIPE
-    stderrTempFiles = []
-    # To avoid deadlock, we use a single stderr stream for piped
-    # output. This is null until we have seen some output using
-    # stderr.
-    for i,j in enumerate(cmd.commands):
-        # Apply the redirections, we use (N,) as a sentinal to indicate stdin,
-        # stdout, stderr for N equal to 0, 1, or 2 respectively. Redirects to or
-        # from a file are represented with a list [file, mode, file-object]
-        # where file-object is initially None.
-        redirects = [(0,), (1,), (2,)]
-        for r in j.redirects:
-            if r[0] == ('>',2):
-                redirects[2] = [r[1], 'w', None]
-            elif r[0] == ('>>',2):
-                redirects[2] = [r[1], 'a', None]
-            elif r[0] == ('>&',2) and r[1] in '012':
-                redirects[2] = redirects[int(r[1])]
-            elif r[0] == ('>&',) or r[0] == ('&>',):
-                redirects[1] = redirects[2] = [r[1], 'w', None]
-            elif r[0] == ('>',):
-                redirects[1] = [r[1], 'w', None]
-            elif r[0] == ('>>',):
-                redirects[1] = [r[1], 'a', None]
-            elif r[0] == ('<',):
-                redirects[0] = [r[1], 'r', None]
-            else:
-                raise NotImplementedError,"Unsupported redirect: %r" % (r,)
-
-        # Map from the final redirections to something subprocess can handle.
-        final_redirects = []
-        for index,r in enumerate(redirects):
-            if r == (0,):
-                result = input
-            elif r == (1,):
-                if index == 0:
-                    raise NotImplementedError,"Unsupported redirect for stdin"
-                elif index == 1:
-                    result = subprocess.PIPE
-                else:
-                    result = subprocess.STDOUT
-            elif r == (2,):
-                if index != 2:
-                    raise NotImplementedError,"Unsupported redirect on stdout"
-                result = subprocess.PIPE
-            else:
-                if r[2] is None:
-                    if kAvoidDevNull and r[0] == '/dev/null':
-                        r[2] = tempfile.TemporaryFile(mode=r[1])
-                    else:
-                        r[2] = open(r[0], r[1])
-                    # Workaround a Win32 and/or subprocess bug when appending.
-                    if r[1] == 'a':
-                        r[2].seek(0, 2)
-                result = r[2]
-            final_redirects.append(result)
-
-        stdin, stdout, stderr = final_redirects
-
-        # If stderr wants to come from stdout, but stdout isn't a pipe, then put
-        # stderr on a pipe and treat it as stdout.
-        if (stderr == subprocess.STDOUT and stdout != subprocess.PIPE):
-            stderr = subprocess.PIPE
-            stderrIsStdout = True
-        else:
-            stderrIsStdout = False
-
-            # Don't allow stderr on a PIPE except for the last
-            # process, this could deadlock.
-            #
-            # FIXME: This is slow, but so is deadlock.
-            if stderr == subprocess.PIPE and j != cmd.commands[-1]:
-                stderr = tempfile.TemporaryFile(mode='w+b')
-                stderrTempFiles.append((i, stderr))
-
-        # Resolve the executable path ourselves.
-        args = list(j.args)
-        args[0] = Util.which(args[0], cfg.environment['PATH'])
-        if not args[0]:
-            raise InternalShellError(j, '%r: command not found' % j.args[0])
-
-        procs.append(subprocess.Popen(args, cwd=cwd,
-                                      stdin = stdin,
-                                      stdout = stdout,
-                                      stderr = stderr,
-                                      env = cfg.environment,
-                                      close_fds = kUseCloseFDs))
-
-        # Immediately close stdin for any process taking stdin from us.
-        if stdin == subprocess.PIPE:
-            procs[-1].stdin.close()
-            procs[-1].stdin = None
-
-        # Update the current stdin source.
-        if stdout == subprocess.PIPE:
-            input = procs[-1].stdout
-        elif stderrIsStdout:
-            input = procs[-1].stderr
-        else:
-            input = subprocess.PIPE
-
-    # FIXME: There is probably still deadlock potential here. Yawn.
-    procData = [None] * len(procs)
-    procData[-1] = procs[-1].communicate()
-
-    for i in range(len(procs) - 1):
-        if procs[i].stdout is not None:
-            out = procs[i].stdout.read()
-        else:
-            out = ''
-        if procs[i].stderr is not None:
-            err = procs[i].stderr.read()
-        else:
-            err = ''
-        procData[i] = (out,err)
-        
-    # Read stderr out of the temp files.
-    for i,f in stderrTempFiles:
-        f.seek(0, 0)
-        procData[i] = (procData[i][0], f.read())
-
-    exitCode = None
-    for i,(out,err) in enumerate(procData):
-        res = procs[i].wait()
-        # Detect Ctrl-C in subprocess.
-        if res == -signal.SIGINT:
-            raise KeyboardInterrupt
-
-        results.append((cmd.commands[i], out, err, res))
-        if cmd.pipe_err:
-            # Python treats the exit code as a signed char.
-            if res < 0:
-                exitCode = min(exitCode, res)
-            else:
-                exitCode = max(exitCode, res)
-        else:
-            exitCode = res
-
-    if cmd.negate:
-        exitCode = not exitCode
-
-    return exitCode
-
-def executeScriptInternal(test, litConfig, tmpBase, commands, cwd):
-    ln = ' &&\n'.join(commands)
-    try:
-        cmd = ShUtil.ShParser(ln, litConfig.isWindows).parse()
-    except:
-        return (Test.FAIL, "shell parser error on: %r" % ln)
-
-    results = []
-    try:
-        exitCode = executeShCmd(cmd, test.config, cwd, results)
-    except InternalShellError,e:
-        out = ''
-        err = e.message
-        exitCode = 255
-
-    out = err = ''
-    for i,(cmd, cmd_out,cmd_err,res) in enumerate(results):
-        out += 'Command %d: %s\n' % (i, ' '.join('"%s"' % s for s in cmd.args))
-        out += 'Command %d Result: %r\n' % (i, res)
-        out += 'Command %d Output:\n%s\n\n' % (i, cmd_out)
-        out += 'Command %d Stderr:\n%s\n\n' % (i, cmd_err)
-
-    return out, err, exitCode
-
-def executeTclScriptInternal(test, litConfig, tmpBase, commands, cwd):
-    import TclUtil
-    cmds = []
-    for ln in commands:
-        # Given the unfortunate way LLVM's test are written, the line gets
-        # backslash substitution done twice.
-        ln = TclUtil.TclLexer(ln).lex_unquoted(process_all = True)
-
-        try:
-            tokens = list(TclUtil.TclLexer(ln).lex())
-        except:
-            return (Test.FAIL, "Tcl lexer error on: %r" % ln)
-
-        # Validate there are no control tokens.
-        for t in tokens:
-            if not isinstance(t, str):
-                return (Test.FAIL,
-                        "Invalid test line: %r containing %r" % (ln, t))
-
-        try:
-            cmds.append(TclUtil.TclExecCommand(tokens).parse_pipeline())
-        except:
-            return (Test.FAIL, "Tcl 'exec' parse error on: %r" % ln)
-
-    cmd = cmds[0]
-    for c in cmds[1:]:
-        cmd = ShUtil.Seq(cmd, '&&', c)
-
-    # FIXME: This is lame, we shouldn't need bash. See PR5240.
-    bashPath = litConfig.getBashPath()
-    if litConfig.useTclAsSh and bashPath:
-        script = tmpBase + '.script'
-
-        # Write script file
-        f = open(script,'w')
-        print >>f, 'set -o pipefail'
-        cmd.toShell(f, pipefail = True)
-        f.close()
-
-        if 0:
-            print >>sys.stdout, cmd
-            print >>sys.stdout, open(script).read()
-            print >>sys.stdout
-            return '', '', 0
-
-        command = [litConfig.getBashPath(), script]
-        out,err,exitCode = executeCommand(command, cwd=cwd,
-                                          env=test.config.environment)
-
-        # Tcl commands fail on standard error output.
-        if err:
-            exitCode = 1
-            out = 'Command has output on stderr!\n\n' + out
-
-        return out,err,exitCode
-    else:
-        results = []
-        try:
-            exitCode = executeShCmd(cmd, test.config, cwd, results)
-        except InternalShellError,e:
-            results.append((e.command, '', e.message + '\n', 255))
-            exitCode = 255
-
-    out = err = ''
-
-    # Tcl commands fail on standard error output.
-    if [True for _,_,err,res in results if err]:
-        exitCode = 1
-        out += 'Command has output on stderr!\n\n'
-
-    for i,(cmd, cmd_out, cmd_err, res) in enumerate(results):
-        out += 'Command %d: %s\n' % (i, ' '.join('"%s"' % s for s in cmd.args))
-        out += 'Command %d Result: %r\n' % (i, res)
-        out += 'Command %d Output:\n%s\n\n' % (i, cmd_out)
-        out += 'Command %d Stderr:\n%s\n\n' % (i, cmd_err)
-
-    return out, err, exitCode
-
-def executeScript(test, litConfig, tmpBase, commands, cwd):
-    script = tmpBase + '.script'
-    if litConfig.isWindows:
-        script += '.bat'
-
-    # Write script file
-    f = open(script,'w')
-    if litConfig.isWindows:
-        f.write('\nif %ERRORLEVEL% NEQ 0 EXIT\n'.join(commands))
-    else:
-        f.write(' &&\n'.join(commands))
-    f.write('\n')
-    f.close()
-
-    if litConfig.isWindows:
-        command = ['cmd','/c', script]
-    else:
-        command = ['/bin/sh', script]
-        if litConfig.useValgrind:
-            # FIXME: Running valgrind on sh is overkill. We probably could just
-            # run on clang with no real loss.
-            valgrindArgs = ['valgrind', '-q',
-                            '--tool=memcheck', '--trace-children=yes',
-                            '--error-exitcode=123']
-            valgrindArgs.extend(litConfig.valgrindArgs)
-
-            command = valgrindArgs + command
-
-    return executeCommand(command, cwd=cwd, env=test.config.environment)
-
-def isExpectedFail(xfails, xtargets, target_triple):
-    # Check if any xfail matches this target.
-    for item in xfails:
-        if item == '*' or item in target_triple:
-            break
-    else:
-        return False
-
-    # If so, see if it is expected to pass on this target.
-    #
-    # FIXME: Rename XTARGET to something that makes sense, like XPASS.
-    for item in xtargets:
-        if item == '*' or item in target_triple:
-            return False
-
-    return True
-
-def parseIntegratedTestScript(test):
-    """parseIntegratedTestScript - Scan an LLVM/Clang style integrated test
-    script and extract the lines to 'RUN' as well as 'XFAIL' and 'XTARGET'
-    information. The RUN lines also will have variable substitution performed.
-    """
-
-    # Get the temporary location, this is always relative to the test suite
-    # root, not test source root.
-    #
-    # FIXME: This should not be here?
-    sourcepath = test.getSourcePath()
-    execpath = test.getExecPath()
-    execdir,execbase = os.path.split(execpath)
-    tmpBase = os.path.join(execdir, 'Output', execbase)
-    if test.index is not None:
-        tmpBase += '_%d' % test.index
-
-    # We use #_MARKER_# to hide %% while we do the other substitutions.
-    substitutions = [('%%', '#_MARKER_#')]
-    substitutions.extend(test.config.substitutions)
-    substitutions.extend([('%s', sourcepath),
-                          ('%S', os.path.dirname(sourcepath)),
-                          ('%p', os.path.dirname(sourcepath)),
-                          ('%t', tmpBase + '.tmp'),
-                          # FIXME: Remove this once we kill DejaGNU.
-                          ('%abs_tmp', tmpBase + '.tmp'),
-                          ('#_MARKER_#', '%')])
-
-    # Collect the test lines from the script.
-    script = []
-    xfails = []
-    xtargets = []
-    for ln in open(sourcepath):
-        if 'RUN:' in ln:
-            # Isolate the command to run.
-            index = ln.index('RUN:')
-            ln = ln[index+4:]
-
-            # Trim trailing whitespace.
-            ln = ln.rstrip()
-
-            # Collapse lines with trailing '\\'.
-            if script and script[-1][-1] == '\\':
-                script[-1] = script[-1][:-1] + ln
-            else:
-                script.append(ln)
-        elif 'XFAIL:' in ln:
-            items = ln[ln.index('XFAIL:') + 6:].split(',')
-            xfails.extend([s.strip() for s in items])
-        elif 'XTARGET:' in ln:
-            items = ln[ln.index('XTARGET:') + 8:].split(',')
-            xtargets.extend([s.strip() for s in items])
-        elif 'END.' in ln:
-            # Check for END. lines.
-            if ln[ln.index('END.'):].strip() == 'END.':
-                break
-
-    # Apply substitutions to the script.
-    def processLine(ln):
-        # Apply substitutions
-        for a,b in substitutions:
-            ln = ln.replace(a,b)
-
-        # Strip the trailing newline and any extra whitespace.
-        return ln.strip()
-    script = map(processLine, script)
-
-    # Verify the script contains a run line.
-    if not script:
-        return (Test.UNRESOLVED, "Test has no run line!")
-
-    if script[-1][-1] == '\\':
-        return (Test.UNRESOLVED, "Test has unterminated run lines (with '\\')")
-
-    isXFail = isExpectedFail(xfails, xtargets, test.suite.config.target_triple)
-    return script,isXFail,tmpBase,execdir
-
-def formatTestOutput(status, out, err, exitCode, script):
-    output = StringIO.StringIO()
-    print >>output, "Script:"
-    print >>output, "--"
-    print >>output, '\n'.join(script)
-    print >>output, "--"
-    print >>output, "Exit Code: %r" % exitCode
-    print >>output, "Command Output (stdout):"
-    print >>output, "--"
-    output.write(out)
-    print >>output, "--"
-    print >>output, "Command Output (stderr):"
-    print >>output, "--"
-    output.write(err)
-    print >>output, "--"
-    return (status, output.getvalue())
-
-def executeTclTest(test, litConfig):
-    if test.config.unsupported:
-        return (Test.UNSUPPORTED, 'Test is unsupported')
-
-    res = parseIntegratedTestScript(test)
-    if len(res) == 2:
-        return res
-
-    script, isXFail, tmpBase, execdir = res
-
-    if litConfig.noExecute:
-        return (Test.PASS, '')
-
-    # Create the output directory if it does not already exist.
-    Util.mkdir_p(os.path.dirname(tmpBase))
-
-    res = executeTclScriptInternal(test, litConfig, tmpBase, script, execdir)
-    if len(res) == 2:
-        return res
-
-    out,err,exitCode = res
-    if isXFail:
-        ok = exitCode != 0
-        status = (Test.XPASS, Test.XFAIL)[ok]
-    else:
-        ok = exitCode == 0
-        status = (Test.FAIL, Test.PASS)[ok]
-
-    if ok:
-        return (status,'')
-
-    return formatTestOutput(status, out, err, exitCode, script)
-
-def executeShTest(test, litConfig, useExternalSh):
-    if test.config.unsupported:
-        return (Test.UNSUPPORTED, 'Test is unsupported')
-
-    res = parseIntegratedTestScript(test)
-    if len(res) == 2:
-        return res
-
-    script, isXFail, tmpBase, execdir = res
-
-    if litConfig.noExecute:
-        return (Test.PASS, '')
-
-    # Create the output directory if it does not already exist.
-    Util.mkdir_p(os.path.dirname(tmpBase))
-
-    if useExternalSh:
-        res = executeScript(test, litConfig, tmpBase, script, execdir)
-    else:
-        res = executeScriptInternal(test, litConfig, tmpBase, script, execdir)
-    if len(res) == 2:
-        return res
-
-    out,err,exitCode = res
-    if isXFail:
-        ok = exitCode != 0
-        status = (Test.XPASS, Test.XFAIL)[ok]
-    else:
-        ok = exitCode == 0
-        status = (Test.FAIL, Test.PASS)[ok]
-
-    if ok:
-        return (status,'')
-
-    return formatTestOutput(status, out, err, exitCode, script)
diff --git a/utils/lit/TestingConfig.py b/utils/lit/TestingConfig.py
deleted file mode 100644
index 1f5067c8e502..000000000000
--- a/utils/lit/TestingConfig.py
+++ /dev/null
@@ -1,97 +0,0 @@
-import os
-
-class TestingConfig:
-    """"
-    TestingConfig - Information on the tests inside a suite.
-    """
-
-    @staticmethod
-    def frompath(path, parent, litConfig, mustExist, config = None):
-        if config is None:
-            # Set the environment based on the command line arguments.
-            environment = {
-                'PATH' : os.pathsep.join(litConfig.path +
-                                         [os.environ.get('PATH','')]),
-                'PATHEXT' : os.environ.get('PATHEXT',''),
-                'SYSTEMROOT' : os.environ.get('SYSTEMROOT',''),
-                'LLVM_DISABLE_CRT_DEBUG' : '1',
-                }
-
-            config = TestingConfig(parent,
-                                   name = '<unnamed>',
-                                   suffixes = set(),
-                                   test_format = None,
-                                   environment = environment,
-                                   substitutions = [],
-                                   unsupported = False,
-                                   on_clone = None,
-                                   test_exec_root = None,
-                                   test_source_root = None,
-                                   excludes = [])
-
-        if os.path.exists(path):
-            # FIXME: Improve detection and error reporting of errors in the
-            # config file.
-            f = open(path)
-            cfg_globals = dict(globals())
-            cfg_globals['config'] = config
-            cfg_globals['lit'] = litConfig
-            cfg_globals['__file__'] = path
-            try:
-                exec f in cfg_globals
-            except SystemExit,status:
-                # We allow normal system exit inside a config file to just
-                # return control without error.
-                if status.args:
-                    raise
-            f.close()
-        elif mustExist:
-            litConfig.fatal('unable to load config from %r ' % path)
-
-        config.finish(litConfig)
-        return config
-
-    def __init__(self, parent, name, suffixes, test_format,
-                 environment, substitutions, unsupported, on_clone,
-                 test_exec_root, test_source_root, excludes):
-        self.parent = parent
-        self.name = str(name)
-        self.suffixes = set(suffixes)
-        self.test_format = test_format
-        self.environment = dict(environment)
-        self.substitutions = list(substitutions)
-        self.unsupported = unsupported
-        self.on_clone = on_clone
-        self.test_exec_root = test_exec_root
-        self.test_source_root = test_source_root
-        self.excludes = set(excludes)
-
-    def clone(self, path):
-        # FIXME: Chain implementations?
-        #
-        # FIXME: Allow extra parameters?
-        cfg = TestingConfig(self, self.name, self.suffixes, self.test_format,
-                            self.environment, self.substitutions,
-                            self.unsupported, self.on_clone,
-                            self.test_exec_root, self.test_source_root,
-                            self.excludes)
-        if cfg.on_clone:
-            cfg.on_clone(self, cfg, path)
-        return cfg
-
-    def finish(self, litConfig):
-        """finish() - Finish this config object, after loading is complete."""
-
-        self.name = str(self.name)
-        self.suffixes = set(self.suffixes)
-        self.environment = dict(self.environment)
-        self.substitutions = list(self.substitutions)
-        if self.test_exec_root is not None:
-            # FIXME: This should really only be suite in test suite config
-            # files. Should we distinguish them?
-            self.test_exec_root = str(self.test_exec_root)
-        if self.test_source_root is not None:
-            # FIXME: This should really only be suite in test suite config
-            # files. Should we distinguish them?
-            self.test_source_root = str(self.test_source_root)
-        self.excludes = set(self.excludes)
diff --git a/utils/lit/Util.py b/utils/lit/Util.py
deleted file mode 100644
index 66c5e46f690a..000000000000
--- a/utils/lit/Util.py
+++ /dev/null
@@ -1,124 +0,0 @@
-import os, sys
-
-def detectCPUs():
-    """
-    Detects the number of CPUs on a system. Cribbed from pp.
-    """
-    # Linux, Unix and MacOS:
-    if hasattr(os, "sysconf"):
-        if os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"):
-            # Linux & Unix:
-            ncpus = os.sysconf("SC_NPROCESSORS_ONLN")
-            if isinstance(ncpus, int) and ncpus > 0:
-                return ncpus
-        else: # OSX:
-            return int(os.popen2("sysctl -n hw.ncpu")[1].read())
-    # Windows:
-    if os.environ.has_key("NUMBER_OF_PROCESSORS"):
-        ncpus = int(os.environ["NUMBER_OF_PROCESSORS"])
-        if ncpus > 0:
-            return ncpus
-    return 1 # Default
-
-def mkdir_p(path):
-    """mkdir_p(path) - Make the "path" directory, if it does not exist; this
-    will also make directories for any missing parent directories."""
-    import errno
-
-    if not path or os.path.exists(path):
-        return
-
-    parent = os.path.dirname(path) 
-    if parent != path:
-        mkdir_p(parent)
-
-    try:
-        os.mkdir(path)
-    except OSError,e:
-        # Ignore EEXIST, which may occur during a race condition.
-        if e.errno != errno.EEXIST:
-            raise
-
-def capture(args):
-    import subprocess
-    """capture(command) - Run the given command (or argv list) in a shell and
-    return the standard output."""
-    p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    out,_ = p.communicate()
-    return out
-
-def which(command, paths = None):
-    """which(command, [paths]) - Look up the given command in the paths string
-    (or the PATH environment variable, if unspecified)."""
-
-    if paths is None:
-        paths = os.environ.get('PATH','')
-
-    # Check for absolute match first.
-    if os.path.exists(command):
-        return command
-
-    # Would be nice if Python had a lib function for this.
-    if not paths:
-        paths = os.defpath
-
-    # Get suffixes to search.
-    pathext = os.environ.get('PATHEXT', '').split(os.pathsep)
-
-    # Search the paths...
-    for path in paths.split(os.pathsep):
-        for ext in pathext:
-            p = os.path.join(path, command + ext)
-            if os.path.exists(p):
-                return p
-
-    return None
-
-def printHistogram(items, title = 'Items'):
-    import itertools, math
-
-    items.sort(key = lambda (_,v): v)
-
-    maxValue = max([v for _,v in items])
-
-    # Select first "nice" bar height that produces more than 10 bars.
-    power = int(math.ceil(math.log(maxValue, 10)))
-    for inc in itertools.cycle((5, 2, 2.5, 1)):
-        barH = inc * 10**power
-        N = int(math.ceil(maxValue / barH))
-        if N > 10:
-            break
-        elif inc == 1:
-            power -= 1
-
-    histo = [set() for i in range(N)]
-    for name,v in items:
-        bin = min(int(N * v/maxValue), N-1)
-        histo[bin].add(name)
-
-    barW = 40
-    hr = '-' * (barW + 34)
-    print '\nSlowest %s:' % title
-    print hr
-    for name,value in items[-20:]:
-        print '%.2fs: %s' % (value, name)
-    print '\n%s Times:' % title
-    print hr
-    pDigits = int(math.ceil(math.log(maxValue, 10)))
-    pfDigits = max(0, 3-pDigits)
-    if pfDigits:
-        pDigits += pfDigits + 1
-    cDigits = int(math.ceil(math.log(len(items), 10)))
-    print "[%s] :: [%s] :: [%s]" % ('Range'.center((pDigits+1)*2 + 3),
-                                    'Percentage'.center(barW),
-                                    'Count'.center(cDigits*2 + 1))
-    print hr
-    for i,row in enumerate(histo):
-        pct = float(len(row)) / len(items)
-        w = int(barW * pct)
-        print "[%*.*fs,%*.*fs)" % (pDigits, pfDigits, i*barH,
-                                   pDigits, pfDigits, (i+1)*barH),
-        print ":: [%s%s] :: [%*d/%*d]" % ('*'*w, ' '*(barW-w),
-                                          cDigits, len(row),
-                                          cDigits, len(items))
-
diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py
index 20fbc6c13a9f..a7de2b79f8f3 100644
--- a/utils/lit/lit/TestRunner.py
+++ b/utils/lit/lit/TestRunner.py
@@ -353,6 +353,8 @@ def isExpectedFail(xfails, xtargets, target_triple):
 
     return True
 
+import re
+
 def parseIntegratedTestScript(test):
     """parseIntegratedTestScript - Scan an LLVM/Clang style integrated test
     script and extract the lines to 'RUN' as well as 'XFAIL' and 'XTARGET'
@@ -385,7 +387,21 @@ def parseIntegratedTestScript(test):
     script = []
     xfails = []
     xtargets = []
+    ignoredAny = False
     for ln in open(sourcepath):
+        conditional = re.search('IF\((.+?)\((.+?)\)\):', ln)
+        if conditional:
+            ln = ln[conditional.end():]
+            condition = conditional.group(1)
+            value = conditional.group(2)
+
+            # Actually test the condition.
+            if condition not in test.config.conditions:
+                return (Test.UNRESOLVED, "unknown condition '"+condition+"'")
+            if not test.config.conditions[condition](value):
+                ignoredAny = True
+                continue
+
         if 'RUN:' in ln:
             # Isolate the command to run.
             index = ln.index('RUN:')
@@ -422,6 +438,8 @@ def parseIntegratedTestScript(test):
 
     # Verify the script contains a run line.
     if not script:
+        if ignoredAny:
+            return (Test.UNSUPPORTED, "Test has only ignored run lines")
         return (Test.UNRESOLVED, "Test has no run line!")
 
     if script[-1][-1] == '\\':
diff --git a/utils/lit/lit/TestingConfig.py b/utils/lit/lit/TestingConfig.py
index 1f5067c8e502..d6f2a4dc7111 100644
--- a/utils/lit/lit/TestingConfig.py
+++ b/utils/lit/lit/TestingConfig.py
@@ -10,6 +10,7 @@ class TestingConfig:
         if config is None:
             # Set the environment based on the command line arguments.
             environment = {
+                'LD_LIBRARY_PATH' : os.environ.get('LD_LIBRARY_PATH',''),
                 'PATH' : os.pathsep.join(litConfig.path +
                                          [os.environ.get('PATH','')]),
                 'PATHEXT' : os.environ.get('PATHEXT',''),
@@ -27,7 +28,8 @@ class TestingConfig:
                                    on_clone = None,
                                    test_exec_root = None,
                                    test_source_root = None,
-                                   excludes = [])
+                                   excludes = [],
+                                   conditions = {})
 
         if os.path.exists(path):
             # FIXME: Improve detection and error reporting of errors in the
@@ -53,7 +55,7 @@ class TestingConfig:
 
     def __init__(self, parent, name, suffixes, test_format,
                  environment, substitutions, unsupported, on_clone,
-                 test_exec_root, test_source_root, excludes):
+                 test_exec_root, test_source_root, excludes, conditions):
         self.parent = parent
         self.name = str(name)
         self.suffixes = set(suffixes)
@@ -65,6 +67,7 @@ class TestingConfig:
         self.test_exec_root = test_exec_root
         self.test_source_root = test_source_root
         self.excludes = set(excludes)
+        self.conditions = dict(conditions)
 
     def clone(self, path):
         # FIXME: Chain implementations?
@@ -74,7 +77,7 @@ class TestingConfig:
                             self.environment, self.substitutions,
                             self.unsupported, self.on_clone,
                             self.test_exec_root, self.test_source_root,
-                            self.excludes)
+                            self.excludes, self.conditions)
         if cfg.on_clone:
             cfg.on_clone(self, cfg, path)
         return cfg
diff --git a/utils/unittest/googletest/tempfile.tmp b/utils/unittest/googletest/tempfile.tmp
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/utils/unittest/googletest/tempfile.tmp
+++ /dev/null