21 files changed, 1055 insertions, 150 deletions
diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h
index ce1a7d6a5230a..fbd999cbc9469 100644
--- a/include/llvm/Transforms/IPO.h
+++ b/include/llvm/Transforms/IPO.h
@@ -199,6 +199,10 @@ ModulePass *createMetaRenamerPass();
 /// manager.
 ModulePass *createBarrierNoopPass();
 
+/// \brief This pass lowers bitset metadata and the llvm.bitset.test intrinsic
+/// to bitsets.
+ModulePass *createLowerBitSetsPass();
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/IPO/LowerBitSets.h b/include/llvm/Transforms/IPO/LowerBitSets.h
new file mode 100644
index 0000000000000..55d7d84560a0c
--- /dev/null
+++ b/include/llvm/Transforms/IPO/LowerBitSets.h
@@ -0,0 +1,198 @@
+//===- LowerBitSets.h - Bitset lowering pass --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines parts of the bitset lowering pass implementation that may
+// be usefully unit tested.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_LOWERBITSETS_H
+#define LLVM_TRANSFORMS_IPO_LOWERBITSETS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+
+#include <stdint.h>
+#include <limits>
+#include <set>
+#include <vector>
+
+namespace llvm {
+
+class DataLayout;
+class GlobalVariable;
+class Value;
+
+struct BitSetInfo {
+  // The indices of the set bits in the bitset.
+  std::set<uint64_t> Bits;
+
+  // The byte offset into the combined global represented by the bitset.
+  uint64_t ByteOffset;
+
+  // The size of the bitset in bits.
+  uint64_t BitSize;
+
+  // Log2 alignment of the bit set relative to the combined global.
+  // For example, a log2 alignment of 3 means that bits in the bitset
+  // represent addresses 8 bytes apart.
+  unsigned AlignLog2;
+
+  bool isSingleOffset() const {
+    return Bits.size() == 1;
+  }
+
+  bool isAllOnes() const {
+    return Bits.size() == BitSize;
+  }
+
+  bool containsGlobalOffset(uint64_t Offset) const;
+
+  bool containsValue(const DataLayout &DL,
+                     const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout,
+                     Value *V, uint64_t COffset = 0) const;
+};
+
+struct BitSetBuilder {
+  SmallVector<uint64_t, 16> Offsets;
+  uint64_t Min, Max;
+
+  BitSetBuilder() : Min(std::numeric_limits<uint64_t>::max()), Max(0) {}
+
+  void addOffset(uint64_t Offset) {
+    if (Min > Offset)
+      Min = Offset;
+    if (Max < Offset)
+      Max = Offset;
+
+    Offsets.push_back(Offset);
+  }
+
+  BitSetInfo build();
+};
+
+/// This class implements a layout algorithm for globals referenced by bit sets
+/// that tries to keep members of small bit sets together. This can
+/// significantly reduce bit set sizes in many cases.
+///
+/// It works by assembling fragments of layout from sets of referenced globals.
+/// Each set of referenced globals causes the algorithm to create a new
+/// fragment, which is assembled by appending each referenced global in the set
+/// into the fragment. If a referenced global has already been referenced by an
+/// fragment created earlier, we instead delete that fragment and append its
+/// contents into the fragment we are assembling.
+///
+/// By starting with the smallest fragments, we minimize the size of the
+/// fragments that are copied into larger fragments. This is most intuitively
+/// thought about when considering the case where the globals are virtual tables
+/// and the bit sets represent their derived classes: in a single inheritance
+/// hierarchy, the optimum layout would involve a depth-first search of the
+/// class hierarchy (and in fact the computed layout ends up looking a lot like
+/// a DFS), but a naive DFS would not work well in the presence of multiple
+/// inheritance. This aspect of the algorithm ends up fitting smaller
+/// hierarchies inside larger ones where that would be beneficial.
+///
+/// For example, consider this class hierarchy:
+///
+/// A       B
+///   \   / | \
+///     C   D   E
+///
+/// We have five bit sets: bsA (A, C), bsB (B, C, D, E), bsC (C), bsD (D) and
+/// bsE (E). If we laid out our objects by DFS traversing B followed by A, our
+/// layout would be {B, C, D, E, A}. This is optimal for bsB as it needs to
+/// cover the only 4 objects in its hierarchy, but not for bsA as it needs to
+/// cover 5 objects, i.e. the entire layout. Our algorithm proceeds as follows:
+///
+/// Add bsC, fragments {{C}}
+/// Add bsD, fragments {{C}, {D}}
+/// Add bsE, fragments {{C}, {D}, {E}}
+/// Add bsA, fragments {{A, C}, {D}, {E}}
+/// Add bsB, fragments {{B, A, C, D, E}}
+///
+/// This layout is optimal for bsA, as it now only needs to cover two (i.e. 3
+/// fewer) objects, at the cost of bsB needing to cover 1 more object.
+///
+/// The bit set lowering pass assigns an object index to each object that needs
+/// to be laid out, and calls addFragment for each bit set passing the object
+/// indices of its referenced globals. It then assembles a layout from the
+/// computed layout in the Fragments field.
+struct GlobalLayoutBuilder {
+  /// The computed layout. Each element of this vector contains a fragment of
+  /// layout (which may be empty) consisting of object indices.
+  std::vector<std::vector<uint64_t>> Fragments;
+
+  /// Mapping from object index to fragment index.
+  std::vector<uint64_t> FragmentMap;
+
+  GlobalLayoutBuilder(uint64_t NumObjects)
+      : Fragments(1), FragmentMap(NumObjects) {}
+
+  /// Add F to the layout while trying to keep its indices contiguous.
+  /// If a previously seen fragment uses any of F's indices, that
+  /// fragment will be laid out inside F.
+  void addFragment(const std::set<uint64_t> &F);
+};
+
+/// This class is used to build a byte array containing overlapping bit sets. By
+/// loading from indexed offsets into the byte array and applying a mask, a
+/// program can test bits from the bit set with a relatively short instruction
+/// sequence. For example, suppose we have 15 bit sets to lay out:
+///
+/// A (16 bits), B (15 bits), C (14 bits), D (13 bits), E (12 bits),
+/// F (11 bits), G (10 bits), H (9 bits), I (7 bits), J (6 bits), K (5 bits),
+/// L (4 bits), M (3 bits), N (2 bits), O (1 bit)
+///
+/// These bits can be laid out in a 16-byte array like this:
+///
+///       Byte Offset
+///     0123456789ABCDEF
+/// Bit
+///   7 HHHHHHHHHIIIIIII
+///   6 GGGGGGGGGGJJJJJJ
+///   5 FFFFFFFFFFFKKKKK
+///   4 EEEEEEEEEEEELLLL
+///   3 DDDDDDDDDDDDDMMM
+///   2 CCCCCCCCCCCCCCNN
+///   1 BBBBBBBBBBBBBBBO
+///   0 AAAAAAAAAAAAAAAA
+///
+/// For example, to test bit X of A, we evaluate ((bits[X] & 1) != 0), or to
+/// test bit X of I, we evaluate ((bits[9 + X] & 0x80) != 0). This can be done
+/// in 1-2 machine instructions on x86, or 4-6 instructions on ARM.
+///
+/// This is a byte array, rather than (say) a 2-byte array or a 4-byte array,
+/// because for one thing it gives us better packing (the more bins there are,
+/// the less evenly they will be filled), and for another, the instruction
+/// sequences can be slightly shorter, both on x86 and ARM.
+struct ByteArrayBuilder {
+  /// The byte array built so far.
+  std::vector<uint8_t> Bytes;
+
+  enum { BitsPerByte = 8 };
+
+  /// The number of bytes allocated so far for each of the bits.
+  uint64_t BitAllocs[BitsPerByte];
+
+  ByteArrayBuilder() {
+    memset(BitAllocs, 0, sizeof(BitAllocs));
+  }
+
+  /// Allocate BitSize bits in the byte array where Bits contains the bits to
+  /// set. AllocByteOffset is set to the offset within the byte array and
+  /// AllocMask is set to the bitmask for those bits. This uses the LPT (Longest
+  /// Processing Time) multiprocessor scheduling algorithm to lay out the bits
+  /// efficiently; the pass allocates bit sets in decreasing size order.
+  void allocate(const std::set<uint64_t> &Bits, uint64_t BitSize,
+                uint64_t &AllocByteOffset, uint8_t &AllocMask);
+};
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h
index b1426b4f45ca1..5d574ae0bf0f2 100644
--- a/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -19,7 +19,7 @@
 
 namespace llvm {
 class Pass;
-class TargetLibraryInfo;
+class TargetLibraryInfoImpl;
 class TargetMachine;
 
 // The old pass manager infrastructure is hidden in a legacy namespace now.
@@ -27,8 +27,6 @@ namespace legacy {
 class FunctionPassManager;
 class PassManagerBase;
 }
-using legacy::FunctionPassManager;
-using legacy::PassManagerBase;
 
 /// PassManagerBuilder - This class is used to set up a standard optimization
 /// sequence for languages like C and C++, allowing some APIs to customize the
@@ -59,7 +57,7 @@ public:
   /// Extensions are passed the builder itself (so they can see how it is
   /// configured) as well as the pass manager to add stuff to.
   typedef void (*ExtensionFn)(const PassManagerBuilder &Builder,
-                              PassManagerBase &PM);
+                              legacy::PassManagerBase &PM);
   enum ExtensionPointTy {
     /// EP_EarlyAsPossible - This extension point allows adding passes before
     /// any other transformations, allowing them to see the code as it is coming
@@ -105,7 +103,7 @@ public:
   /// LibraryInfo - Specifies information about the runtime library for the
   /// optimizer.  If this is non-null, it is added to both the function and
   /// per-module pass pipeline.
-  TargetLibraryInfo *LibraryInfo;
+  TargetLibraryInfoImpl *LibraryInfo;
 
   /// Inliner - Specifies the inliner to use.  If this is non-null, it is
   /// added to the per-module passes.
@@ -122,7 +120,6 @@ public:
   bool DisableGVNLoadPRE;
   bool VerifyInput;
   bool VerifyOutput;
-  bool StripDebug;
   bool MergeFunctions;
 
 private:
@@ -139,19 +136,21 @@ public:
   void addExtension(ExtensionPointTy Ty, ExtensionFn Fn);
 
 private:
-  void addExtensionsToPM(ExtensionPointTy ETy, PassManagerBase &PM) const;
-  void addInitialAliasAnalysisPasses(PassManagerBase &PM) const;
-  void addLTOOptimizationPasses(PassManagerBase &PM);
+  void addExtensionsToPM(ExtensionPointTy ETy,
+                         legacy::PassManagerBase &PM) const;
+  void addInitialAliasAnalysisPasses(legacy::PassManagerBase &PM) const;
+  void addLTOOptimizationPasses(legacy::PassManagerBase &PM);
+  void addLateLTOOptimizationPasses(legacy::PassManagerBase &PM);
 
 public:
   /// populateFunctionPassManager - This fills in the function pass manager,
   /// which is expected to be run on each function immediately as it is
   /// generated.  The idea is to reduce the size of the IR in memory.
-  void populateFunctionPassManager(FunctionPassManager &FPM);
+  void populateFunctionPassManager(legacy::FunctionPassManager &FPM);
 
   /// populateModulePassManager - This sets up the primary pass manager.
-  void populateModulePassManager(PassManagerBase &MPM);
-  void populateLTOPassManager(PassManagerBase &PM, TargetMachine *TM = nullptr);
+  void populateModulePassManager(legacy::PassManagerBase &MPM);
+  void populateLTOPassManager(legacy::PassManagerBase &PM);
 };
 
 /// Registers a function for adding a standard set of passes.  This should be
diff --git a/include/llvm/Transforms/InstCombine/InstCombine.h b/include/llvm/Transforms/InstCombine/InstCombine.h
new file mode 100644
index 0000000000000..f48ec13107bc9
--- /dev/null
+++ b/include/llvm/Transforms/InstCombine/InstCombine.h
@@ -0,0 +1,46 @@
+//===- InstCombine.h - InstCombine pass -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file provides the primary interface to the instcombine pass. This pass
+/// is suitable for use in the new pass manager. For a pass that works with the
+/// legacy pass manager, please look for \c createInstructionCombiningPass() in
+/// Scalar.h.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINE_H
+#define LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINE_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
+
+namespace llvm {
+
+class InstCombinePass {
+  InstCombineWorklist Worklist;
+
+public:
+  static StringRef name() { return "InstCombinePass"; }
+
+  // Explicitly define constructors for MSVC.
+  InstCombinePass() {}
+  InstCombinePass(InstCombinePass &&Arg) : Worklist(std::move(Arg.Worklist)) {}
+  InstCombinePass &operator=(InstCombinePass &&RHS) {
+    Worklist = std::move(RHS.Worklist);
+    return *this;
+  }
+
+  PreservedAnalyses run(Function &F, AnalysisManager<Function> *AM);
+};
+
+}
+
+#endif
diff --git a/include/llvm/Transforms/InstCombine/InstCombineWorklist.h b/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
new file mode 100644
index 0000000000000..a6bad343db43b
--- /dev/null
+++ b/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
@@ -0,0 +1,116 @@
+//===- InstCombineWorklist.h - Worklist for InstCombine pass ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINEWORKLIST_H
+#define LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINEWORKLIST_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "instcombine"
+
+namespace llvm {
+
+/// InstCombineWorklist - This is the worklist management logic for
+/// InstCombine.
+class InstCombineWorklist {
+  SmallVector<Instruction*, 256> Worklist;
+  DenseMap<Instruction*, unsigned> WorklistMap;
+
+  void operator=(const InstCombineWorklist&RHS) = delete;
+  InstCombineWorklist(const InstCombineWorklist&) = delete;
+public:
+  InstCombineWorklist() {}
+
+  InstCombineWorklist(InstCombineWorklist &&Arg)
+      : Worklist(std::move(Arg.Worklist)),
+        WorklistMap(std::move(Arg.WorklistMap)) {}
+  InstCombineWorklist &operator=(InstCombineWorklist &&RHS) {
+    Worklist = std::move(RHS.Worklist);
+    WorklistMap = std::move(RHS.WorklistMap);
+    return *this;
+  }
+
+  bool isEmpty() const { return Worklist.empty(); }
+
+  /// Add - Add the specified instruction to the worklist if it isn't already
+  /// in it.
+  void Add(Instruction *I) {
+    if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
+      DEBUG(dbgs() << "IC: ADD: " << *I << '\n');
+      Worklist.push_back(I);
+    }
+  }
+
+  void AddValue(Value *V) {
+    if (Instruction *I = dyn_cast<Instruction>(V))
+      Add(I);
+  }
+
+  /// AddInitialGroup - Add the specified batch of stuff in reverse order.
+  /// which should only be done when the worklist is empty and when the group
+  /// has no duplicates.
+  void AddInitialGroup(Instruction *const *List, unsigned NumEntries) {
+    assert(Worklist.empty() && "Worklist must be empty to add initial group");
+    Worklist.reserve(NumEntries+16);
+    WorklistMap.resize(NumEntries);
+    DEBUG(dbgs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
+    for (unsigned Idx = 0; NumEntries; --NumEntries) {
+      Instruction *I = List[NumEntries-1];
+      WorklistMap.insert(std::make_pair(I, Idx++));
+      Worklist.push_back(I);
+    }
+  }
+
+  // Remove - remove I from the worklist if it exists.
+  void Remove(Instruction *I) {
+    DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);
+    if (It == WorklistMap.end()) return; // Not in worklist.
+
+    // Don't bother moving everything down, just null out the slot.
+    Worklist[It->second] = nullptr;
+
+    WorklistMap.erase(It);
+  }
+
+  Instruction *RemoveOne() {
+    Instruction *I = Worklist.pop_back_val();
+    WorklistMap.erase(I);
+    return I;
+  }
+
+  /// AddUsersToWorkList - When an instruction is simplified, add all users of
+  /// the instruction to the work lists because they might get more simplified
+  /// now.
+  ///
+  void AddUsersToWorkList(Instruction &I) {
+    for (User *U : I.users())
+      Add(cast<Instruction>(U));
+  }
+
+
+  /// Zap - check that the worklist is empty and nuke the backing store for
+  /// the map if it is large.
+  void Zap() {
+    assert(WorklistMap.empty() && "Worklist empty, but map not?");
+
+    // Do an explicit clear, this shrinks the map if needed.
+    WorklistMap.clear();
+  }
+};
+
+} // end namespace llvm.
+
+#undef DEBUG_TYPE
+
+#endif
diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h
index 24e3ef783ee62..884f54fd3737c 100644
--- a/include/llvm/Transforms/Instrumentation.h
+++ b/include/llvm/Transforms/Instrumentation.h
@@ -15,6 +15,7 @@
 #define LLVM_TRANSFORMS_INSTRUMENTATION_H
 
 #include "llvm/ADT/StringRef.h"
+#include <vector>
 
 #if defined(__GNUC__) && defined(__linux__) && !defined(ANDROID)
 inline void *getDFSanArgTLSPtrForJIT() {
@@ -59,6 +60,10 @@ struct GCOVOptions {
   // Emit the name of the function in the .gcda files. This is redundant, as
   // the function identifier can be used to find the name from the .gcno file.
   bool FunctionNamesInData;
+
+  // Emit the exit block immediately after the start block, rather than after
+  // all of the function body's blocks.
+  bool ExitBlockBeforeBody;
 };
 ModulePass *createGCOVProfilerPass(const GCOVOptions &Options =
                                    GCOVOptions::getDefault());
@@ -69,6 +74,9 @@ struct InstrProfOptions {
 
   // Add the 'noredzone' attribute to added runtime library calls.
   bool NoRedZone;
+
+  // Name of the profile file to use as output
+  std::string InstrProfileOutput;
 };
 
 /// Insert frontend instrumentation based profiling.
@@ -86,17 +94,36 @@ FunctionPass *createMemorySanitizerPass(int TrackOrigins = 0);
 FunctionPass *createThreadSanitizerPass();
 
 // Insert DataFlowSanitizer (dynamic data flow analysis) instrumentation
-ModulePass *createDataFlowSanitizerPass(StringRef ABIListFile = StringRef(),
-                                        void *(*getArgTLS)() = nullptr,
-                                        void *(*getRetValTLS)() = nullptr);
+ModulePass *createDataFlowSanitizerPass(
+    const std::vector<std::string> &ABIListFiles = std::vector<std::string>(),
+    void *(*getArgTLS)() = nullptr, void *(*getRetValTLS)() = nullptr);
+
+// Options for sanitizer coverage instrumentation.
+struct SanitizerCoverageOptions {
+  SanitizerCoverageOptions()
+      : CoverageType(SCK_None), IndirectCalls(false), TraceBB(false),
+        TraceCmp(false), Use8bitCounters(false) {}
+
+  enum Type {
+    SCK_None = 0,
+    SCK_Function,
+    SCK_BB,
+    SCK_Edge
+  } CoverageType;
+  bool IndirectCalls;
+  bool TraceBB;
+  bool TraceCmp;
+  bool Use8bitCounters;
+};
 
 // Insert SanitizerCoverage instrumentation.
-ModulePass *createSanitizerCoverageModulePass(int CoverageLevel);
+ModulePass *createSanitizerCoverageModulePass(
+    const SanitizerCoverageOptions &Options = SanitizerCoverageOptions());
 
 #if defined(__GNUC__) && defined(__linux__) && !defined(ANDROID)
-inline ModulePass *createDataFlowSanitizerPassForJIT(StringRef ABIListFile =
-                                                         StringRef()) {
-  return createDataFlowSanitizerPass(ABIListFile, getDFSanArgTLSPtrForJIT,
+inline ModulePass *createDataFlowSanitizerPassForJIT(
+    const std::vector<std::string> &ABIListFiles = std::vector<std::string>()) {
+  return createDataFlowSanitizerPass(ABIListFiles, getDFSanArgTLSPtrForJIT,
                                      getDFSanRetValTLSPtrForJIT);
 }
 #endif
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index 5dcd899487598..c4669f121e6ac 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -21,6 +21,7 @@ namespace llvm {
 
 class BasicBlockPass;
 class FunctionPass;
+class ModulePass;
 class Pass;
 class GetElementPtrInst;
 class PassInfo;
@@ -81,6 +82,13 @@ FunctionPass *createAggressiveDCEPass();
 
 //===----------------------------------------------------------------------===//
 //
+// BitTrackingDCE - This pass uses a bit-tracking DCE algorithm in order to
+// remove computations of dead bits.
+//
+FunctionPass *createBitTrackingDCEPass();
+
+//===----------------------------------------------------------------------===//
+//
 // SROA - Replace aggregates or pieces of aggregates with scalar SSA values.
 //
 FunctionPass *createSROAPass(bool RequiresDomTree = true);
@@ -98,6 +106,13 @@ FunctionPass *createScalarReplAggregatesPass(signed Threshold = -1,
 
 //===----------------------------------------------------------------------===//
 //
+// InductiveRangeCheckElimination - Transform loops to elide range checks on
+// linear functions of the induction variable.
+//
+Pass *createInductiveRangeCheckEliminationPass();
+
+//===----------------------------------------------------------------------===//
+//
 // InductionVariableSimplify - Transform induction variables in a program to all
 // use a single canonical induction variable per loop.
 //
@@ -125,12 +140,19 @@ Pass *createLICMPass();
 
 //===----------------------------------------------------------------------===//
 //
+// LoopInterchange - This pass interchanges loops to provide a more
+// cache-friendly memory access patterns.
+//
+Pass *createLoopInterchangePass();
+
+//===----------------------------------------------------------------------===//
+//
 // LoopStrengthReduce - This pass is strength reduces GEP instructions that use
 // a loop's canonical induction variable as one of their indices.
 //
 Pass *createLoopStrengthReducePass();
 
-Pass *createGlobalMergePass(const TargetMachine *TM = nullptr);
+Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset);
 
 //===----------------------------------------------------------------------===//
 //
@@ -401,10 +423,59 @@ createSeparateConstOffsetFromGEPPass(const TargetMachine *TM = nullptr,
 
 //===----------------------------------------------------------------------===//
 //
+// SpeculativeExecution - Aggressively hoist instructions to enable
+// speculative execution on targets where branches are expensive.
+//
+FunctionPass *createSpeculativeExecutionPass();
+
+//===----------------------------------------------------------------------===//
+//
 // LoadCombine - Combine loads into bigger loads.
 //
 BasicBlockPass *createLoadCombinePass();
 
+//===----------------------------------------------------------------------===//
+//
+// StraightLineStrengthReduce - This pass strength-reduces some certain
+// instruction patterns in straight-line code.
+//
+FunctionPass *createStraightLineStrengthReducePass();
+
+//===----------------------------------------------------------------------===//
+//
+// PlaceSafepoints - Rewrite any IR calls to gc.statepoints and insert any
+// safepoint polls (method entry, backedge) that might be required.  This pass
+// does not generate explicit relocation sequences - that's handled by
+// RewriteStatepointsForGC which can be run at an arbitrary point in the pass
+// order following this pass.
+//
+FunctionPass *createPlaceSafepointsPass();
+
+//===----------------------------------------------------------------------===//
+//
+// RewriteStatepointsForGC - Rewrite any gc.statepoints which do not yet have
+// explicit relocations to include explicit relocations.
+//
+FunctionPass *createRewriteStatepointsForGCPass();
+
+//===----------------------------------------------------------------------===//
+//
+// Float2Int - Demote floats to ints where possible.
+//
+FunctionPass *createFloat2IntPass();
+
+//===----------------------------------------------------------------------===//
+//
+// NaryReassociate - Simplify n-ary operations by reassociation.
+//
+FunctionPass *createNaryReassociatePass();
+
+//===----------------------------------------------------------------------===//
+//
+// LoopDistribute - Distribute loops.
+//
+FunctionPass *createLoopDistributePass();
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Scalar/EarlyCSE.h b/include/llvm/Transforms/Scalar/EarlyCSE.h
new file mode 100644
index 0000000000000..e3dd3c050df61
--- /dev/null
+++ b/include/llvm/Transforms/Scalar/EarlyCSE.h
@@ -0,0 +1,39 @@
+//===- EarlyCSE.h - Simple and fast CSE pass --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file provides the interface for a simple, fast CSE pass.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_EARLYCSE_H
+#define LLVM_TRANSFORMS_SCALAR_EARLYCSE_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// \brief A simple and fast domtree-based CSE pass.
+///
+/// This pass does a simple depth-first walk over the dominator tree,
+/// eliminating trivially redundant instructions and using instsimplify to
+/// canonicalize things as it goes. It is intended to be fast and catch obvious
+/// cases so that instcombine and other passes are more effective. It is
+/// expected that a later pass of GVN will catch the interesting/hard cases.
+class EarlyCSEPass {
+public:
+  static StringRef name() { return "EarlyCSEPass"; }
+
+  /// \brief Run the pass over the function.
+  PreservedAnalyses run(Function &F, AnalysisManager<Function> *AM);
+};
+
+}
+
+#endif
diff --git a/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h b/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h
new file mode 100644
index 0000000000000..40283203f3a38
--- /dev/null
+++ b/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h
@@ -0,0 +1,40 @@
+//===- LowerExpectIntrinsic.h - LowerExpectIntrinsic pass -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// The header file for the LowerExpectIntrinsic pass as used by the new pass
+/// manager.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOWEREXPECTINTRINSIC_H
+#define LLVM_TRANSFORMS_SCALAR_LOWEREXPECTINTRINSIC_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class LowerExpectIntrinsicPass {
+public:
+  static StringRef name() { return "LowerExpectIntrinsicPass"; }
+
+  /// \brief Run the pass over the function.
+  ///
+  /// This will lower all of th expect intrinsic calls in this function into
+  /// branch weight metadata. That metadata will subsequently feed the analysis
+  /// of the probabilities and frequencies of the CFG. After running this pass,
+  /// no more expect intrinsics remain, allowing the rest of the optimizer to
+  /// ignore them.
+  PreservedAnalyses run(Function &F);
+};
+
+}
+
+#endif
diff --git a/include/llvm/Transforms/Scalar/SimplifyCFG.h b/include/llvm/Transforms/Scalar/SimplifyCFG.h
new file mode 100644
index 0000000000000..ef28e0f78a4c1
--- /dev/null
+++ b/include/llvm/Transforms/Scalar/SimplifyCFG.h
@@ -0,0 +1,46 @@
+//===- SimplifyCFG.h - Simplify and canonicalize the CFG --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file provides the interface for the pass responsible for both
+/// simplifying and canonicalizing the CFG.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFG_H
+#define LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFG_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// \brief A pass to simplify and canonicalize the CFG of a function.
+///
+/// This pass iteratively simplifies the entire CFG of a function, removing
+/// unnecessary control flows and bringing it into the canonical form expected
+/// by the rest of the mid-level optimizer.
+class SimplifyCFGPass {
+  int BonusInstThreshold;
+
+public:
+  static StringRef name() { return "SimplifyCFGPass"; }
+
+  /// \brief Construct a pass with the default thresholds.
+  SimplifyCFGPass();
+
+  /// \brief Construct a pass with a specific bonus threshold.
+  SimplifyCFGPass(int BonusInstThreshold);
+
+  /// \brief Run the pass over the function.
+  PreservedAnalyses run(Function &F, AnalysisManager<Function> *AM);
+};
+
+}
+
+#endif
diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 19acf5b2db837..710db03c45d69 100644
--- a/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -23,10 +23,11 @@
 namespace llvm {
 
 class AliasAnalysis;
+class MemoryDependenceAnalysis;
 class DominatorTree;
+class LoopInfo;
 class Instruction;
 class MDNode;
-class Pass;
 class ReturnInst;
 class TargetLibraryInfo;
 class TerminatorInst;
@@ -39,7 +40,8 @@ void DeleteDeadBlock(BasicBlock *BB);
 /// any single-entry PHI nodes in it, fold them away.  This handles the case
 /// when all entries to the PHI nodes in a block are guaranteed equal, such as
 /// when the block has exactly one predecessor.
-void FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P = nullptr);
+void FoldSingleEntryPHINodes(BasicBlock *BB, AliasAnalysis *AA = nullptr,
+                             MemoryDependenceAnalysis *MemDep = nullptr);
 
 /// DeleteDeadPHIs - Examine each PHI in the given block and delete it if it
 /// is dead. Also recursively delete any operands that become dead as
@@ -50,7 +52,10 @@ bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI = nullptr);
 
 /// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
 /// if possible.  The return value indicates success or failure.
-bool MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P = nullptr);
+bool MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT = nullptr,
+                               LoopInfo *LI = nullptr,
+                               AliasAnalysis *AA = nullptr,
+                               MemoryDependenceAnalysis *MemDep = nullptr);
 
 // ReplaceInstWithValue - Replace all uses of an instruction (specified by BI)
 // with a value, then remove and delete the original instruction.
@@ -70,18 +75,62 @@ void ReplaceInstWithInst(BasicBlock::InstListType &BIL,
 //
 void ReplaceInstWithInst(Instruction *From, Instruction *To);
 
+/// \brief Option class for critical edge splitting.
+///
+/// This provides a builder interface for overriding the default options used
+/// during critical edge splitting.
+struct CriticalEdgeSplittingOptions {
+  AliasAnalysis *AA;
+  DominatorTree *DT;
+  LoopInfo *LI;
+  bool MergeIdenticalEdges;
+  bool DontDeleteUselessPHIs;
+  bool PreserveLCSSA;
+
+  CriticalEdgeSplittingOptions()
+      : AA(nullptr), DT(nullptr), LI(nullptr), MergeIdenticalEdges(false),
+        DontDeleteUselessPHIs(false), PreserveLCSSA(false) {}
+
+  /// \brief Basic case of setting up all the analysis.
+  CriticalEdgeSplittingOptions(AliasAnalysis *AA, DominatorTree *DT = nullptr,
+                               LoopInfo *LI = nullptr)
+      : AA(AA), DT(DT), LI(LI), MergeIdenticalEdges(false),
+        DontDeleteUselessPHIs(false), PreserveLCSSA(false) {}
+
+  /// \brief A common pattern is to preserve the dominator tree and loop
+  /// info but not care about AA.
+  CriticalEdgeSplittingOptions(DominatorTree *DT, LoopInfo *LI)
+      : AA(nullptr), DT(DT), LI(LI), MergeIdenticalEdges(false),
+        DontDeleteUselessPHIs(false), PreserveLCSSA(false) {}
+
+  CriticalEdgeSplittingOptions &setMergeIdenticalEdges() {
+    MergeIdenticalEdges = true;
+    return *this;
+  }
+
+  CriticalEdgeSplittingOptions &setDontDeleteUselessPHIs() {
+    DontDeleteUselessPHIs = true;
+    return *this;
+  }
+
+  CriticalEdgeSplittingOptions &setPreserveLCSSA() {
+    PreserveLCSSA = true;
+    return *this;
+  }
+};
+
 /// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
-/// split the critical edge.  This will update DominatorTree and
-/// DominatorFrontier information if it is available, thus calling this pass
-/// will not invalidate either of them. This returns the new block if the edge
-/// was split, null otherwise.
+/// split the critical edge.  This will update the analyses passed in through
+/// the option struct. This returns the new block if the edge was split, null
+/// otherwise.
 ///
-/// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the
-/// specified successor will be merged into the same critical edge block.
-/// This is most commonly interesting with switch instructions, which may
-/// have many edges to any one destination.  This ensures that all edges to that
-/// dest go to one block instead of each going to a different block, but isn't
-/// the standard definition of a "critical edge".
+/// If MergeIdenticalEdges in the options struct is true (not the default),
+/// *all* edges from TI to the specified successor will be merged into the same
+/// critical edge block. This is most commonly interesting with switch
+/// instructions, which may have many edges to any one destination.  This
+/// ensures that all edges to that dest go to one block instead of each going
+/// to a different block, but isn't the standard definition of a "critical
+/// edge".
 ///
 /// It is invalid to call this function on a critical edge that starts at an
 /// IndirectBrInst.  Splitting these edges will almost always create an invalid
@@ -89,14 +138,15 @@ void ReplaceInstWithInst(Instruction *From, Instruction *To);
 /// to.
 ///
 BasicBlock *SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
-                              Pass *P = nullptr,
-                              bool MergeIdenticalEdges = false,
-                              bool DontDeleteUselessPHIs = false,
-                              bool SplitLandingPads = false);
-
-inline BasicBlock *SplitCriticalEdge(BasicBlock *BB, succ_iterator SI,
-                                     Pass *P = nullptr) {
-  return SplitCriticalEdge(BB->getTerminator(), SI.getSuccessorIndex(), P);
+                              const CriticalEdgeSplittingOptions &Options =
+                                  CriticalEdgeSplittingOptions());
+
+inline BasicBlock *
+SplitCriticalEdge(BasicBlock *BB, succ_iterator SI,
+                  const CriticalEdgeSplittingOptions &Options =
+                      CriticalEdgeSplittingOptions()) {
+  return SplitCriticalEdge(BB->getTerminator(), SI.getSuccessorIndex(),
+                           Options);
 }
 
 /// SplitCriticalEdge - If the edge from *PI to BB is not critical, return
@@ -105,55 +155,62 @@ inline BasicBlock *SplitCriticalEdge(BasicBlock *BB, succ_iterator SI,
 /// function.  If P is specified, it updates the analyses
 /// described above.
 inline bool SplitCriticalEdge(BasicBlock *Succ, pred_iterator PI,
-                              Pass *P = nullptr) {
+                              const CriticalEdgeSplittingOptions &Options =
+                                  CriticalEdgeSplittingOptions()) {
   bool MadeChange = false;
   TerminatorInst *TI = (*PI)->getTerminator();
   for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
     if (TI->getSuccessor(i) == Succ)
-      MadeChange |= !!SplitCriticalEdge(TI, i, P);
+      MadeChange |= !!SplitCriticalEdge(TI, i, Options);
   return MadeChange;
 }
 
 /// SplitCriticalEdge - If an edge from Src to Dst is critical, split the edge
 /// and return true, otherwise return false.  This method requires that there be
-/// an edge between the two blocks.  If P is specified, it updates the analyses
-/// described above.
-inline BasicBlock *SplitCriticalEdge(BasicBlock *Src, BasicBlock *Dst,
-                                     Pass *P = nullptr,
-                                     bool MergeIdenticalEdges = false,
-                                     bool DontDeleteUselessPHIs = false) {
+/// an edge between the two blocks.  It updates the analyses
+/// passed in the options struct
+inline BasicBlock *
+SplitCriticalEdge(BasicBlock *Src, BasicBlock *Dst,
+                  const CriticalEdgeSplittingOptions &Options =
+                      CriticalEdgeSplittingOptions()) {
   TerminatorInst *TI = Src->getTerminator();
   unsigned i = 0;
   while (1) {
     assert(i != TI->getNumSuccessors() && "Edge doesn't exist!");
     if (TI->getSuccessor(i) == Dst)
-      return SplitCriticalEdge(TI, i, P, MergeIdenticalEdges,
-                               DontDeleteUselessPHIs);
+      return SplitCriticalEdge(TI, i, Options);
     ++i;
   }
 }
 
 // SplitAllCriticalEdges - Loop over all of the edges in the CFG,
-// breaking critical edges as they are found. Pass P must not be NULL.
+// breaking critical edges as they are found.
 // Returns the number of broken edges.
-unsigned SplitAllCriticalEdges(Function &F, Pass *P);
+unsigned SplitAllCriticalEdges(Function &F,
+                               const CriticalEdgeSplittingOptions &Options =
+                                   CriticalEdgeSplittingOptions());
 
-/// SplitEdge -  Split the edge connecting specified block. Pass P must
-/// not be NULL.
-BasicBlock *SplitEdge(BasicBlock *From, BasicBlock *To, Pass *P);
+/// SplitEdge -  Split the edge connecting specified block.
+BasicBlock *SplitEdge(BasicBlock *From, BasicBlock *To,
+                      DominatorTree *DT = nullptr, LoopInfo *LI = nullptr);
 
 /// SplitBlock - Split the specified block at the specified instruction - every
 /// thing before SplitPt stays in Old and everything starting with SplitPt moves
 /// to a new block.  The two blocks are joined by an unconditional branch and
 /// the loop info is updated.
 ///
-BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P);
+BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt,
+                       DominatorTree *DT = nullptr, LoopInfo *LI = nullptr);
 
-/// SplitBlockPredecessors - This method transforms BB by introducing a new
-/// basic block into the function, and moving some of the predecessors of BB to
-/// be predecessors of the new block.  The new predecessors are indicated by the
-/// Preds array, which has NumPreds elements in it.  The new block is given a
-/// suffix of 'Suffix'.  This function returns the new block.
+/// SplitBlockPredecessors - This method introduces at least one new basic block
+/// into the function and moves some of the predecessors of BB to be
+/// predecessors of the new block. The new predecessors are indicated by the
+/// Preds array. The new block is given a suffix of 'Suffix'. Returns new basic
+/// block to which predecessors from Preds are now pointing.
+///
+/// If BB is a landingpad block then additional basicblock might be introduced.
+/// It will have Suffix+".split_lp". See SplitLandingPadPredecessors for more
+/// details on this case.
 ///
 /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
 /// DominanceFrontier, LoopInfo, and LCCSA but no other analyses.
@@ -161,8 +218,12 @@ BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P);
 /// complicated to handle the case where one of the edges being split
 /// is an exit of a loop with other exits).
 ///
-BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock*> Preds,
-                                   const char *Suffix, Pass *P = nullptr);
+BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
+                                   const char *Suffix,
+                                   AliasAnalysis *AA = nullptr,
+                                   DominatorTree *DT = nullptr,
+                                   LoopInfo *LI = nullptr,
+                                   bool PreserveLCSSA = false);
 
 /// SplitLandingPadPredecessors - This method transforms the landing pad,
 /// OrigBB, by introducing two new basic blocks into the function. One of those
@@ -177,9 +238,14 @@ BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock*> Preds,
 /// case where one of the edges being split is an exit of a loop with other
 /// exits).
 ///
-void SplitLandingPadPredecessors(BasicBlock *OrigBB,ArrayRef<BasicBlock*> Preds,
+void SplitLandingPadPredecessors(BasicBlock *OrigBB,
+                                 ArrayRef<BasicBlock *> Preds,
                                  const char *Suffix, const char *Suffix2,
-                                 Pass *P, SmallVectorImpl<BasicBlock*> &NewBBs);
+                                 SmallVectorImpl<BasicBlock *> &NewBBs,
+                                 AliasAnalysis *AA = nullptr,
+                                 DominatorTree *DT = nullptr,
+                                 LoopInfo *LI = nullptr,
+                                 bool PreserveLCSSA = false);
 
 /// FoldReturnIntoUncondBranch - This method duplicates the specified return
 /// instruction into a predecessor which ends in an unconditional branch. If
diff --git a/include/llvm/Transforms/Utils/BuildLibCalls.h b/include/llvm/Transforms/Utils/BuildLibCalls.h
index 6387c166ec71d..879f295caf0cc 100644
--- a/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -28,52 +28,50 @@ namespace llvm {
   /// EmitStrLen - Emit a call to the strlen function to the builder, for the
   /// specified pointer.  Ptr is required to be some pointer type, and the
   /// return value has 'intptr_t' type.
-  Value *EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
+  Value *EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
                     const TargetLibraryInfo *TLI);
 
   /// EmitStrNLen - Emit a call to the strnlen function to the builder, for the
   /// specified pointer.  Ptr is required to be some pointer type, MaxLen must
   /// be of size_t type, and the return value has 'intptr_t' type.
   Value *EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
-                     const DataLayout *TD, const TargetLibraryInfo *TLI);
+                     const DataLayout &DL, const TargetLibraryInfo *TLI);
 
   /// EmitStrChr - Emit a call to the strchr function to the builder, for the
   /// specified pointer and character.  Ptr is required to be some pointer type,
   /// and the return value has 'i8*' type.
-  Value *EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, const DataLayout *TD,
+  Value *EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
                     const TargetLibraryInfo *TLI);
 
   /// EmitStrNCmp - Emit a call to the strncmp function to the builder.
   Value *EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
-                     const DataLayout *TD, const TargetLibraryInfo *TLI);
+                     const DataLayout &DL, const TargetLibraryInfo *TLI);
 
   /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
   /// specified pointer arguments.
   Value *EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
-                    const DataLayout *TD, const TargetLibraryInfo *TLI,
-                    StringRef Name = "strcpy");
+                    const TargetLibraryInfo *TLI, StringRef Name = "strcpy");
 
   /// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
   /// specified pointer arguments and length.
   Value *EmitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
-                     const DataLayout *TD, const TargetLibraryInfo *TLI,
-                     StringRef Name = "strncpy");
+                     const TargetLibraryInfo *TLI, StringRef Name = "strncpy");
 
   /// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder.
   /// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
   /// are pointers.
   Value *EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
-                       IRBuilder<> &B, const DataLayout *TD,
+                       IRBuilder<> &B, const DataLayout &DL,
                        const TargetLibraryInfo *TLI);
 
   /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
   /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
   Value *EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
-                    const DataLayout *TD, const TargetLibraryInfo *TLI);
+                    const DataLayout &DL, const TargetLibraryInfo *TLI);
 
   /// EmitMemCmp - Emit a call to the memcmp function.
   Value *EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
-                    const DataLayout *TD, const TargetLibraryInfo *TLI);
+                    const DataLayout &DL, const TargetLibraryInfo *TLI);
 
   /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name'
   /// (e.g.  'floor').  This function is known to take a single of type matching
@@ -93,28 +91,26 @@ namespace llvm {
 
   /// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
   /// is an integer.
-  Value *EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD,
-                     const TargetLibraryInfo *TLI);
+  Value *EmitPutChar(Value *Char, IRBuilder<> &B, const TargetLibraryInfo *TLI);
 
   /// EmitPutS - Emit a call to the puts function.  This assumes that Str is
   /// some pointer.
-  Value *EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
-                  const TargetLibraryInfo *TLI);
+  Value *EmitPutS(Value *Str, IRBuilder<> &B, const TargetLibraryInfo *TLI);
 
   /// EmitFPutC - Emit a call to the fputc function.  This assumes that Char is
   /// an i32, and File is a pointer to FILE.
   Value *EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
-                   const DataLayout *TD, const TargetLibraryInfo *TLI);
+                   const TargetLibraryInfo *TLI);
 
   /// EmitFPutS - Emit a call to the puts function.  Str is required to be a
   /// pointer and File is a pointer to FILE.
-  Value *EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, const DataLayout *TD,
+  Value *EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
                    const TargetLibraryInfo *TLI);
 
   /// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is
   /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
   Value *EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
-                    const DataLayout *TD, const TargetLibraryInfo *TLI);
+                    const DataLayout &DL, const TargetLibraryInfo *TLI);
 }
 
 #endif
diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h
index 6584abe822d80..cb187ec103d0d 100644
--- a/include/llvm/Transforms/Utils/Cloning.h
+++ b/include/llvm/Transforms/Utils/Cloning.h
@@ -95,8 +95,7 @@ struct ClonedCodeInfo {
 /// function, you can specify a ClonedCodeInfo object with the optional fifth
 /// parameter.
 ///
-BasicBlock *CloneBasicBlock(const BasicBlock *BB,
-                            ValueToValueMapTy &VMap,
+BasicBlock *CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
                             const Twine &NameSuffix = "", Function *F = nullptr,
                             ClonedCodeInfo *CodeInfo = nullptr);
 
@@ -112,8 +111,7 @@ BasicBlock *CloneBasicBlock(const BasicBlock *BB,
 /// If ModuleLevelChanges is false, VMap contains no non-identity GlobalValue
 /// mappings, and debug info metadata will not be cloned.
 ///
-Function *CloneFunction(const Function *F,
-                        ValueToValueMapTy &VMap,
+Function *CloneFunction(const Function *F, ValueToValueMapTy &VMap,
                         bool ModuleLevelChanges,
                         ClonedCodeInfo *CodeInfo = nullptr);
 
@@ -127,14 +125,50 @@ Function *CloneFunction(const Function *F,
 /// mappings.
 ///
 void CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
-                       ValueToValueMapTy &VMap,
-                       bool ModuleLevelChanges,
+                       ValueToValueMapTy &VMap, bool ModuleLevelChanges,
                        SmallVectorImpl<ReturnInst*> &Returns,
                        const char *NameSuffix = "",
                        ClonedCodeInfo *CodeInfo = nullptr,
                        ValueMapTypeRemapper *TypeMapper = nullptr,
                        ValueMaterializer *Materializer = nullptr);
 
+/// A helper class used with CloneAndPruneIntoFromInst to change the default
+/// behavior while instructions are being cloned.
+class CloningDirector {
+public:
+  /// This enumeration describes the way CloneAndPruneIntoFromInst should
+  /// proceed after the CloningDirector has examined an instruction.
+  enum CloningAction {
+    ///< Continue cloning the instruction (default behavior).
+    CloneInstruction,
+    ///< Skip this instruction but continue cloning the current basic block.
+    SkipInstruction,
+    ///< Skip this instruction and stop cloning the current basic block.
+    StopCloningBB,
+    ///< Don't clone the terminator but clone the current block's successors.
+    CloneSuccessors
+  };
+
+  virtual ~CloningDirector() {}
+
+  /// Subclasses must override this function to customize cloning behavior.
+  virtual CloningAction handleInstruction(ValueToValueMapTy &VMap,
+                                          const Instruction *Inst,
+                                          BasicBlock *NewBB) = 0;
+
+  virtual ValueMapTypeRemapper *getTypeRemapper() { return nullptr; }
+  virtual ValueMaterializer *getValueMaterializer() { return nullptr; }
+};
+
+void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
+                               const Instruction *StartingInst,
+                               ValueToValueMapTy &VMap, bool ModuleLevelChanges,
+                               SmallVectorImpl<ReturnInst*> &Returns,
+                               const char *NameSuffix = "", 
+                               ClonedCodeInfo *CodeInfo = nullptr,
+                               CloningDirector *Director = nullptr);
+
+
 /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
 /// except that it does some simple constant prop and DCE on the fly.  The
 /// effect of this is to copy significantly less code in cases where (for
@@ -147,12 +181,10 @@ void CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
 /// mappings.
 ///
 void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
-                               ValueToValueMapTy &VMap,
-                               bool ModuleLevelChanges,
+                               ValueToValueMapTy &VMap, bool ModuleLevelChanges,
                                SmallVectorImpl<ReturnInst*> &Returns,
                                const char *NameSuffix = "",
                                ClonedCodeInfo *CodeInfo = nullptr,
-                               const DataLayout *DL = nullptr,
                                Instruction *TheCall = nullptr);
 
 /// InlineFunctionInfo - This class captures the data input to the
@@ -160,21 +192,19 @@ void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
 class InlineFunctionInfo {
 public:
   explicit InlineFunctionInfo(CallGraph *cg = nullptr,
-                              const DataLayout *DL = nullptr,
                               AliasAnalysis *AA = nullptr,
                               AssumptionCacheTracker *ACT = nullptr)
-      : CG(cg), DL(DL), AA(AA), ACT(ACT) {}
+      : CG(cg), AA(AA), ACT(ACT) {}
 
   /// CG - If non-null, InlineFunction will update the callgraph to reflect the
   /// changes it makes.
   CallGraph *CG;
-  const DataLayout *DL;
   AliasAnalysis *AA;
   AssumptionCacheTracker *ACT;
 
   /// StaticAllocas - InlineFunction fills this in with all static allocas that
   /// get copied into the caller.
-  SmallVector<AllocaInst*, 4> StaticAllocas;
+  SmallVector<AllocaInst *, 4> StaticAllocas;
 
   /// InlinedCalls - InlineFunction fills this in with callsites that were
   /// inlined from the callee.  This is only filled in if CG is non-null.
@@ -196,9 +226,12 @@ public:
 /// exists in the instruction stream.  Similarly this will inline a recursive
 /// function by one level.
 ///
-bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI, bool InsertLifetime = true);
-bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI, bool InsertLifetime = true);
-bool InlineFunction(CallSite CS, InlineFunctionInfo &IFI, bool InsertLifetime = true);
+bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI,
+                    bool InsertLifetime = true);
+bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
+                    bool InsertLifetime = true);
+bool InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
+                    bool InsertLifetime = true);
 
 } // End llvm namespace
 
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index 14844117f44f8..a1bb367ac7b67 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -16,6 +16,7 @@
 #define LLVM_TRANSFORMS_UTILS_LOCAL_H
 
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Operator.h"
@@ -31,7 +32,6 @@ class DbgDeclareInst;
 class StoreInst;
 class LoadInst;
 class Value;
-class Pass;
 class PHINode;
 class AllocaInst;
 class AssumptionCache;
@@ -89,7 +89,7 @@ bool RecursivelyDeleteDeadPHINode(PHINode *PN,
 ///
 /// This returns true if it changed the code, note that it can delete
 /// instructions in other blocks as well in this block.
-bool SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD = nullptr,
+bool SimplifyInstructionsInBlock(BasicBlock *BB,
                                  const TargetLibraryInfo *TLI = nullptr);
 
 //===----------------------------------------------------------------------===//
@@ -107,15 +107,14 @@ bool SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD = nullptr,
 ///
 /// .. and delete the predecessor corresponding to the '1', this will attempt to
 /// recursively fold the 'and' to 0.
-void RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
-                                  DataLayout *TD = nullptr);
+void RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred);
 
 /// MergeBasicBlockIntoOnlyPred - BB is a block with one predecessor and its
 /// predecessor is known to have one successor (BB!).  Eliminate the edge
 /// between them, moving the instructions in the predecessor into BB.  This
 /// deletes the predecessor block.
 ///
-void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, Pass *P = nullptr);
+void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DominatorTree *DT = nullptr);
 
 /// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
 /// unconditional branch, and contains no instructions other than PHI nodes,
@@ -138,8 +137,7 @@ bool EliminateDuplicatePHINodes(BasicBlock *BB);
 /// the basic block that was pointed to.
 ///
 bool SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
-                 unsigned BonusInstThreshold, const DataLayout *TD = nullptr,
-                 AssumptionCache *AC = nullptr);
+                 unsigned BonusInstThreshold, AssumptionCache *AC = nullptr);
 
 /// FlatternCFG - This function is used to flatten a CFG.  For
 /// example, it uses parallel-and and parallel-or mode to collapse
@@ -151,8 +149,7 @@ bool FlattenCFG(BasicBlock *BB, AliasAnalysis *AA = nullptr);
 /// and if a predecessor branches to us and one of our successors, fold the
 /// setcc into the predecessor and use logical operations to pick the right
 /// destination.
-bool FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL = nullptr,
-                            unsigned BonusInstThreshold = 1);
+bool FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold = 1);
 
 /// DemoteRegToStack - This function takes a virtual register computed by an
 /// Instruction and replaces it with a slot in the stack frame, allocated via
@@ -174,18 +171,17 @@ AllocaInst *DemotePHIToStack(PHINode *P, Instruction *AllocaPoint = nullptr);
 /// and it is more than the alignment of the ultimate object, see if we can
 /// increase the alignment of the ultimate object, making this check succeed.
 unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
-                                    const DataLayout *TD = nullptr,
-                                    AssumptionCache *AC = nullptr,
+                                    const DataLayout &DL,
                                     const Instruction *CxtI = nullptr,
+                                    AssumptionCache *AC = nullptr,
                                     const DominatorTree *DT = nullptr);
 
 /// getKnownAlignment - Try to infer an alignment for the specified pointer.
-static inline unsigned getKnownAlignment(Value *V,
-                                         const DataLayout *TD = nullptr,
-                                         AssumptionCache *AC = nullptr,
+static inline unsigned getKnownAlignment(Value *V, const DataLayout &DL,
                                          const Instruction *CxtI = nullptr,
+                                         AssumptionCache *AC = nullptr,
                                          const DominatorTree *DT = nullptr) {
-  return getOrEnforceKnownAlignment(V, 0, TD, AC, CxtI, DT);
+  return getOrEnforceKnownAlignment(V, 0, DL, CxtI, AC, DT);
 }
 
 /// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the
@@ -193,11 +189,11 @@ static inline unsigned getKnownAlignment(Value *V,
 /// in the base pointer).  Return the result as a signed integer of intptr size.
 /// When NoAssumptions is true, no assumptions about index computation not
 /// overflowing is made.
-template<typename IRBuilderTy>
-Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &TD, User *GEP,
+template <typename IRBuilderTy>
+Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
                      bool NoAssumptions = false) {
   GEPOperator *GEPOp = cast<GEPOperator>(GEP);
-  Type *IntPtrTy = TD.getIntPtrType(GEP->getType());
+  Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
   Value *Result = Constant::getNullValue(IntPtrTy);
 
   // If the GEP is inbounds, we know that none of the addressing operations will
@@ -212,7 +208,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &TD, User *GEP,
   for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
        ++i, ++GTI) {
     Value *Op = *i;
-    uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
+    uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
     if (Constant *OpC = dyn_cast<Constant>(Op)) {
       if (OpC->isZeroValue())
         continue;
@@ -223,7 +219,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &TD, User *GEP,
           OpC = OpC->getSplatValue();
 
         uint64_t OpValue = cast<ConstantInt>(OpC)->getZExtValue();
-        Size = TD.getStructLayout(STy)->getElementOffset(OpValue);
+        Size = DL.getStructLayout(STy)->getElementOffset(OpValue);
 
         if (Size)
           Result = Builder->CreateAdd(Result, ConstantInt::get(IntPtrTy, Size),
@@ -275,10 +271,11 @@ bool LowerDbgDeclare(Function &F);
 /// an alloca, if any.
 DbgDeclareInst *FindAllocaDbgDeclare(Value *V);
 
-/// replaceDbgDeclareForAlloca - Replaces llvm.dbg.declare instruction when
-/// alloca is replaced with a new value.
+/// \brief Replaces llvm.dbg.declare instruction when an alloca is replaced with
+/// a new value.  If Deref is true, tan additional DW_OP_deref is prepended to
+/// the expression.
 bool replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
-                                DIBuilder &Builder);
+                                DIBuilder &Builder, bool Deref);
 
 /// \brief Remove all blocks that can not be reached from the function's entry.
 ///
@@ -290,6 +287,10 @@ bool removeUnreachableBlocks(Function &F);
 /// Metadata not listed as known via KnownIDs is removed
 void combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsigned> KnownIDs);
 
+/// \brief Replace each use of 'From' with 'To' if that use is dominated by 
+/// the given edge.  Returns the number of replacements made.
+unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT,
+                                  const BasicBlockEdge &Edge);
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h
index f315adc49d157..28791f5f43a88 100644
--- a/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/include/llvm/Transforms/Utils/LoopUtils.h
@@ -14,8 +14,14 @@
 #ifndef LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
 #define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
 
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+
 namespace llvm {
 class AliasAnalysis;
+class AliasSet;
+class AliasSetTracker;
 class AssumptionCache;
 class BasicBlock;
 class DataLayout;
@@ -23,7 +29,147 @@ class DominatorTree;
 class Loop;
 class LoopInfo;
 class Pass;
+class PredIteratorCache;
 class ScalarEvolution;
+class TargetLibraryInfo;
+
+/// \brief Captures loop safety information.
+/// It keep information for loop & its header may throw exception.
+struct LICMSafetyInfo {
+  bool MayThrow;           // The current loop contains an instruction which
+                           // may throw.
+  bool HeaderMayThrow;     // Same as previous, but specific to loop header
+  LICMSafetyInfo() : MayThrow(false), HeaderMayThrow(false)
+  {}
+};
+
+/// This POD struct holds information about a potential reduction operation.
+class ReductionInstDesc {
+
+public:
+  // This enum represents the kind of minmax reduction.
+  enum MinMaxReductionKind {
+    MRK_Invalid,
+    MRK_UIntMin,
+    MRK_UIntMax,
+    MRK_SIntMin,
+    MRK_SIntMax,
+    MRK_FloatMin,
+    MRK_FloatMax
+  };
+  ReductionInstDesc(bool IsRedux, Instruction *I)
+      : IsReduction(IsRedux), PatternLastInst(I), MinMaxKind(MRK_Invalid) {}
+
+  ReductionInstDesc(Instruction *I, MinMaxReductionKind K)
+      : IsReduction(true), PatternLastInst(I), MinMaxKind(K) {}
+
+  bool isReduction() { return IsReduction; }
+
+  MinMaxReductionKind getMinMaxKind() { return MinMaxKind; }
+ 
+  Instruction *getPatternInst() { return PatternLastInst; }
+
+private:
+  // Is this instruction a reduction candidate.
+  bool IsReduction;
+  // The last instruction in a min/max pattern (select of the select(icmp())
+  // pattern), or the current reduction instruction otherwise.
+  Instruction *PatternLastInst;
+  // If this is a min/max pattern the comparison predicate.
+  MinMaxReductionKind MinMaxKind;
+};
+
+/// This struct holds information about reduction variables.
+class ReductionDescriptor {
+
+public:
+  /// This enum represents the kinds of reductions that we support.
+  enum ReductionKind {
+    RK_NoReduction,   ///< Not a reduction.
+    RK_IntegerAdd,    ///< Sum of integers.
+    RK_IntegerMult,   ///< Product of integers.
+    RK_IntegerOr,     ///< Bitwise or logical OR of numbers.
+    RK_IntegerAnd,    ///< Bitwise or logical AND of numbers.
+    RK_IntegerXor,    ///< Bitwise or logical XOR of numbers.
+    RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
+    RK_FloatAdd,      ///< Sum of floats.
+    RK_FloatMult,     ///< Product of floats.
+    RK_FloatMinMax    ///< Min/max implemented in terms of select(cmp()).
+  };
+
+  ReductionDescriptor()
+      : StartValue(nullptr), LoopExitInstr(nullptr), Kind(RK_NoReduction),
+        MinMaxKind(ReductionInstDesc::MRK_Invalid) {}
+
+  ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K,
+                      ReductionInstDesc::MinMaxReductionKind MK)
+      : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {}
+
+  /// Returns a struct describing if the instruction 'I' can be a reduction
+  /// variable of type 'Kind'. If the reduction is a min/max pattern of
+  /// select(icmp()) this function advances the instruction pointer 'I' from the
+  /// compare instruction to the select instruction and stores this pointer in
+  /// 'PatternLastInst' member of the returned struct.
+  static ReductionInstDesc isReductionInstr(Instruction *I, ReductionKind Kind,
+                                            ReductionInstDesc &Prev,
+                                            bool HasFunNoNaNAttr);
+
+  /// Returns true if instuction I has multiple uses in Insts
+  static bool hasMultipleUsesOf(Instruction *I,
+                                SmallPtrSetImpl<Instruction *> &Insts);
+
+  /// Returns true if all uses of the instruction I is within the Set.
+  static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl<Instruction *> &Set);
+
+  /// Returns a struct describing if the instruction if the instruction is a
+  /// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y)
+  /// or max(X, Y).
+  static ReductionInstDesc isMinMaxSelectCmpPattern(Instruction *I,
+                                                    ReductionInstDesc &Prev);
+
+  /// Returns identity corresponding to the ReductionKind.
+  static Constant *getReductionIdentity(ReductionKind K, Type *Tp);
+
+  /// Returns the opcode of binary operation corresponding to the ReductionKind.
+  static unsigned getReductionBinOp(ReductionKind Kind);
+
+  /// Returns a Min/Max operation corresponding to MinMaxReductionKind.
+  static Value *createMinMaxOp(IRBuilder<> &Builder,
+                               ReductionInstDesc::MinMaxReductionKind RK,
+                               Value *Left, Value *Right);
+
+  /// Returns true if Phi is a reduction of type Kind and adds it to the
+  /// ReductionDescriptor.
+  static bool AddReductionVar(PHINode *Phi, ReductionKind Kind, Loop *TheLoop,
+                              bool HasFunNoNaNAttr,
+                              ReductionDescriptor &RedDes);
+
+  /// Returns true if Phi is a reduction in TheLoop. The ReductionDescriptor is
+  /// returned in RedDes.
+  static bool isReductionPHI(PHINode *Phi, Loop *TheLoop,
+                             ReductionDescriptor &RedDes);
+
+  ReductionKind getReductionKind() { return Kind; }
+
+  ReductionInstDesc::MinMaxReductionKind getMinMaxReductionKind() {
+    return MinMaxKind;
+  }
+
+  TrackingVH<Value> getReductionStartValue() { return StartValue; }
+
+  Instruction *getLoopExitInstr() { return LoopExitInstr; }
+
+private:
+  // The starting value of the reduction.
+  // It does not have to be zero!
+  TrackingVH<Value> StartValue;
+  // The instruction who's value is used outside the loop.
+  Instruction *LoopExitInstr;
+  // The kind of the reduction.
+  ReductionKind Kind;
+  // If this a min/max reduction the kind of reduction.
+  ReductionInstDesc::MinMaxReductionKind MinMaxKind;
+};
 
 BasicBlock *InsertPreheaderForLoop(Loop *L, Pass *P);
 
@@ -35,7 +181,6 @@ BasicBlock *InsertPreheaderForLoop(Loop *L, Pass *P);
 /// passed into it.
 bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
                   AliasAnalysis *AA = nullptr, ScalarEvolution *SE = nullptr,
-                  const DataLayout *DL = nullptr,
                   AssumptionCache *AC = nullptr);
 
 /// \brief Put loop into LCSSA form.
@@ -63,6 +208,53 @@ bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
 /// Returns true if any modifications are made to the loop.
 bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
                           ScalarEvolution *SE = nullptr);
+
+/// \brief Walk the specified region of the CFG (defined by all blocks
+/// dominated by the specified block, and that are in the current loop) in
+/// reverse depth first order w.r.t the DominatorTree. This allows us to visit
+/// uses before definitions, allowing us to sink a loop body in one pass without
+/// iteration. Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree,
+/// DataLayout, TargetLibraryInfo, Loop, AliasSet information for all
+/// instructions of the loop and loop safety information as arguments.
+/// It returns changed status.
+bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
+                TargetLibraryInfo *, Loop *, AliasSetTracker *,
+                LICMSafetyInfo *);
+
+/// \brief Walk the specified region of the CFG (defined by all blocks
+/// dominated by the specified block, and that are in the current loop) in depth
+/// first order w.r.t the DominatorTree.  This allows us to visit definitions
+/// before uses, allowing us to hoist a loop body in one pass without iteration.
+/// Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree, DataLayout,
+/// TargetLibraryInfo, Loop, AliasSet information for all instructions of the
+/// loop and loop safety information as arguments. It returns changed status.
+bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
+                 TargetLibraryInfo *, Loop *, AliasSetTracker *,
+                 LICMSafetyInfo *);
+
+/// \brief Try to promote memory values to scalars by sinking stores out of
+/// the loop and moving loads to before the loop.  We do this by looping over
+/// the stores in the loop, looking for stores to Must pointers which are 
+/// loop invariant. It takes AliasSet, Loop exit blocks vector, loop exit blocks
+/// insertion point vector, PredIteratorCache, LoopInfo, DominatorTree, Loop,
+/// AliasSet information for all instructions of the loop and loop safety 
+/// information as arguments. It returns changed status.
+bool promoteLoopAccessesToScalars(AliasSet &, SmallVectorImpl<BasicBlock*> &,
+                                  SmallVectorImpl<Instruction*> &,
+                                  PredIteratorCache &, LoopInfo *,
+                                  DominatorTree *, Loop *, AliasSetTracker *,
+                                  LICMSafetyInfo *);
+
+/// \brief Computes safety information for a loop
+/// checks loop body & header for the possiblity of may throw
+/// exception, it takes LICMSafetyInfo and loop as argument.
+/// Updates safety information in LICMSafetyInfo argument.
+void computeLICMSafetyInfo(LICMSafetyInfo *, Loop *);
+
+/// \brief Checks if the given PHINode in a loop header is an induction
+/// variable. Returns true if this is an induction PHI along with the step
+/// value.
+bool isInductionPHI(PHINode *, ScalarEvolution *, ConstantInt *&);
 }
 
 #endif
diff --git a/include/llvm/Transforms/Utils/ModuleUtils.h b/include/llvm/Transforms/Utils/ModuleUtils.h
index 16904f16e3698..622265bae143b 100644
--- a/include/llvm/Transforms/Utils/ModuleUtils.h
+++ b/include/llvm/Transforms/Utils/ModuleUtils.h
@@ -14,12 +14,19 @@
 #ifndef LLVM_TRANSFORMS_UTILS_MODULEUTILS_H
 #define LLVM_TRANSFORMS_UTILS_MODULEUTILS_H
 
+#include "llvm/ADT/ArrayRef.h"
+#include <utility> // for std::pair
+
 namespace llvm {
 
 class Module;
 class Function;
 class GlobalValue;
 class GlobalVariable;
+class Constant;
+class StringRef;
+class Value;
+class Type;
 template <class PtrType> class SmallPtrSetImpl;
 
 /// Append F to the list of global ctors of module M with the given Priority.
@@ -36,6 +43,20 @@ void appendToGlobalDtors(Module &M, Function *F, int Priority);
 GlobalVariable *collectUsedGlobalVariables(Module &M,
                                            SmallPtrSetImpl<GlobalValue *> &Set,
                                            bool CompilerUsed);
+
+// Validate the result of Module::getOrInsertFunction called for an interface
+// function of given sanitizer. If the instrumented module defines a function
+// with the same name, their prototypes must match, otherwise
+// getOrInsertFunction returns a bitcast.
+Function *checkSanitizerInterfaceFunction(Constant *FuncOrBitcast);
+
+/// \brief Creates sanitizer constructor function, and calls sanitizer's init
+/// function from it.
+/// \return Returns pair of pointers to constructor, and init functions
+/// respectively.
+std::pair<Function *, Function *> createSanitizerCtorAndInitFunctions(
+    Module &M, StringRef CtorName, StringRef InitName,
+    ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs);
 } // End llvm namespace
 
 #endif //  LLVM_TRANSFORMS_UTILS_MODULEUTILS_H
diff --git a/include/llvm/Transforms/Utils/SSAUpdater.h b/include/llvm/Transforms/Utils/SSAUpdater.h
index 7874a5fd8119a..1c7b2c587a36a 100644
--- a/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_TRANSFORMS_UTILS_SSAUPDATER_H
 #define LLVM_TRANSFORMS_UTILS_SSAUPDATER_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Compiler.h"
 
@@ -118,8 +119,8 @@ public:
 private:
   Value *GetValueAtEndOfBlockInternal(BasicBlock *BB);
 
-  void operator=(const SSAUpdater&) LLVM_DELETED_FUNCTION;
-  SSAUpdater(const SSAUpdater&) LLVM_DELETED_FUNCTION;
+  void operator=(const SSAUpdater&) = delete;
+  SSAUpdater(const SSAUpdater&) = delete;
 };
 
 /// \brief Helper class for promoting a collection of loads and stores into SSA
@@ -135,7 +136,7 @@ protected:
   SSAUpdater &SSA;
 
 public:
-  LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts,
+  LoadAndStorePromoter(ArrayRef<const Instruction*> Insts,
                        SSAUpdater &S, StringRef Name = StringRef());
   virtual ~LoadAndStorePromoter() {}
 
diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index 70ef0eb765ddd..41159603aae5e 100644
--- a/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -15,9 +15,10 @@
 #ifndef LLVM_TRANSFORMS_UTILS_SIMPLIFYLIBCALLS_H
 #define LLVM_TRANSFORMS_UTILS_SIMPLIFYLIBCALLS_H
 
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/IRBuilder.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 
 namespace llvm {
 class Value;
@@ -36,12 +37,11 @@ class Function;
 /// is unknown) by passing true for OnlyLowerUnknownSize.
 class FortifiedLibCallSimplifier {
 private:
-  const DataLayout *DL;
   const TargetLibraryInfo *TLI;
   bool OnlyLowerUnknownSize;
 
 public:
-  FortifiedLibCallSimplifier(const DataLayout *DL, const TargetLibraryInfo *TLI,
+  FortifiedLibCallSimplifier(const TargetLibraryInfo *TLI,
                              bool OnlyLowerUnknownSize = false);
 
   /// \brief Take the given call instruction and return a more
@@ -71,15 +71,24 @@ private:
 class LibCallSimplifier {
 private:
   FortifiedLibCallSimplifier FortifiedSimplifier;
-  const DataLayout *DL;
+  const DataLayout &DL;
   const TargetLibraryInfo *TLI;
   bool UnsafeFPShrink;
+  function_ref<void(Instruction *, Value *)> Replacer;
 
-protected:
-  ~LibCallSimplifier() {}
+  /// \brief Internal wrapper for RAUW that is the default implementation.
+  ///
+  /// Other users may provide an alternate function with this signature instead
+  /// of this one.
+  static void replaceAllUsesWithDefault(Instruction *I, Value *With);
+
+  /// \brief Replace an instruction's uses with a value using our replacer.
+  void replaceAllUsesWith(Instruction *I, Value *With);
 
 public:
-  LibCallSimplifier(const DataLayout *TD, const TargetLibraryInfo *TLI);
+  LibCallSimplifier(const DataLayout &DL, const TargetLibraryInfo *TLI,
+                    function_ref<void(Instruction *, Value *)> Replacer =
+                        &replaceAllUsesWithDefault);
 
   /// optimizeCall - Take the given call instruction and return a more
   /// optimal value to replace the instruction with or 0 if a more
@@ -90,11 +99,6 @@ public:
   /// The call must not be an indirect call.
   Value *optimizeCall(CallInst *CI);
 
-  /// replaceAllUsesWith - This method is used when the library call
-  /// simplifier needs to replace instructions other than the library
-  /// call being modified.
-  virtual void replaceAllUsesWith(Instruction *I, Value *With) const;
-
 private:
   // String and Memory Library Call Optimizations
   Value *optimizeStrCat(CallInst *CI, IRBuilder<> &B);
@@ -112,6 +116,7 @@ private:
   Value *optimizeStrSpn(CallInst *CI, IRBuilder<> &B);
   Value *optimizeStrCSpn(CallInst *CI, IRBuilder<> &B);
   Value *optimizeStrStr(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeMemChr(CallInst *CI, IRBuilder<> &B);
   Value *optimizeMemCmp(CallInst *CI, IRBuilder<> &B);
   Value *optimizeMemCpy(CallInst *CI, IRBuilder<> &B);
   Value *optimizeMemMove(CallInst *CI, IRBuilder<> &B);
diff --git a/include/llvm/Transforms/Utils/SymbolRewriter.h b/include/llvm/Transforms/Utils/SymbolRewriter.h
index af79372657959..5ccee98f97e7b 100644
--- a/include/llvm/Transforms/Utils/SymbolRewriter.h
+++ b/include/llvm/Transforms/Utils/SymbolRewriter.h
@@ -60,10 +60,10 @@ namespace SymbolRewriter {
 /// select the symbols to rewrite.  This descriptor list is passed to the
 /// SymbolRewriter pass.
 class RewriteDescriptor : public ilist_node<RewriteDescriptor> {
-  RewriteDescriptor(const RewriteDescriptor &) LLVM_DELETED_FUNCTION;
+  RewriteDescriptor(const RewriteDescriptor &) = delete;
 
   const RewriteDescriptor &
-  operator=(const RewriteDescriptor &) LLVM_DELETED_FUNCTION;
+  operator=(const RewriteDescriptor &) = delete;
 
 public:
   enum class Type {
@@ -90,9 +90,6 @@ typedef iplist<RewriteDescriptor> RewriteDescriptorList;
 
 class RewriteMapParser {
 public:
-  RewriteMapParser() {}
-  ~RewriteMapParser() {}
-
   bool parse(const std::string &MapFile, RewriteDescriptorList *Descriptors);
 
 private:
diff --git a/include/llvm/Transforms/Utils/UnrollLoop.h b/include/llvm/Transforms/Utils/UnrollLoop.h
index 807367cd4b129..7f2cf8d7f59e5 100644
--- a/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -16,20 +16,27 @@
 #ifndef LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H
 #define LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H
 
+#include "llvm/ADT/StringRef.h"
+
 namespace llvm {
 
 class AssumptionCache;
 class Loop;
 class LoopInfo;
 class LPPassManager;
+class MDNode;
 class Pass;
 
 bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime,
-                unsigned TripMultiple, LoopInfo *LI, Pass *PP,
-                LPPassManager *LPM, AssumptionCache *AC);
+                bool AllowExpensiveTripCount, unsigned TripMultiple,
+                LoopInfo *LI, Pass *PP, LPPassManager *LPM,
+                AssumptionCache *AC);
+
+bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
+                             bool AllowExpensiveTripCount, LoopInfo *LI,
+                             LPPassManager *LPM);
 
-bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
-                             LPPassManager* LPM);
+MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
 }
 
 #endif
diff --git a/include/llvm/Transforms/Utils/VectorUtils.h b/include/llvm/Transforms/Utils/VectorUtils.h
index b47acf517b37c..9f0fb19d667a1 100644
--- a/include/llvm/Transforms/Utils/VectorUtils.h
+++ b/include/llvm/Transforms/Utils/VectorUtils.h
@@ -14,9 +14,9 @@
 #ifndef LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
 #define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
 
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 
 namespace llvm {
 
@@ -59,7 +59,7 @@ static inline bool isTriviallyVectorizable(Intrinsic::ID ID) {
   }
 }
 
-static bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
+static inline bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
                                          unsigned ScalarOpdIdx) {
   switch (ID) {
     case Intrinsic::ctlz: