15 files changed, 185 insertions, 42 deletions
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index 549021e0e6e62..573fcb822aba2 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -30,7 +30,8 @@ from now, will be version 5.0.0.
 
 Non-comprehensive list of changes in this release
 =================================================
-* Minimum compiler version to build has been raised to GCC 4.8 and VS 2015.
+* The minimum compiler version required for building LLVM has been raised to
+  4.8 for GCC and 2015 for Visual Studio.
 
 * The C API functions ``LLVMAddFunctionAttr``, ``LLVMGetFunctionAttr``,
   ``LLVMRemoveFunctionAttr``, ``LLVMAddAttribute``, ``LLVMRemoveAttribute``,
@@ -56,15 +57,8 @@ Non-comprehensive list of changes in this release
   with LLVM option ``-adce-remove-loops`` when the loop body otherwise has
   no live operations.
 
-* The GVNHoist pass is now enabled by default. The new pass based on Global
-  Value Numbering detects similar computations in branch code and replaces
-  multiple instances of the same computation with a unique expression.  The
-  transform benefits code size and generates better schedules.  GVNHoist is
-  more aggressive at ``-Os`` and ``-Oz``, hoisting more expressions at the
-  expense of execution time degradations.
-
- * The llvm-cov tool can now export coverage data as json. Its html output mode
-   has also improved.
+* The llvm-cov tool can now export coverage data as json. Its html output mode
+  has also improved.
 
 Improvements to ThinLTO (-flto=thin)
 ------------------------------------
@@ -225,6 +219,10 @@ Changes to the ARM Targets
 A lot of work has also been done in LLD for ARM, which now supports more
 relocations and TLS.
 
+Note: From the next release (5.0), the "vulcan" target will be renamed to
+"thunderx2t99", including command line options, assembly directives, etc. This
+release (4.0) will be the last one to accept "vulcan" as its name.
+
 Changes to the AVR Target
 -----------------------------
 
@@ -274,6 +272,15 @@ Changes to the MIPS Target
 * Fixed several crashes involving FastISel.
 * Corrected the corrected definitions for aui/daui/dahi/dati for MIPSR6.
 
+Changes to the X86 Target
+-------------------------
+
+**During this release the X86 target has:**
+
+* Added support AMD Ryzen (znver1) CPUs.
+* Gained support for using VEX encoding on AVX-512 CPUs to reduce code size when possible.
+* Improved AVX-512 codegen.
+
 Changes to the OCaml bindings
 -----------------------------
 
@@ -299,6 +306,34 @@ x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on ARM
 and PowerPC (32/64 bit). Ports to other architectures like AArch64 and MIPS64
 are underway.
 
+Portable Computing Language (pocl)
+----------------------------------
+
+In addition to producing an easily portable open source OpenCL
+implementation, another major goal of `pocl <http://pocl.sourceforge.net/>`_
+is improving performance portability of OpenCL programs with
+compiler optimizations, reducing the need for target-dependent manual
+optimizations. An important part of pocl is a set of LLVM passes used to
+statically parallelize multiple work-items with the kernel compiler, even in
+the presence of work-group barriers. This enables static parallelization of
+the fine-grained static concurrency in the work groups in multiple ways.
+
+TTA-based Co-design Environment (TCE)
+-------------------------------------
+
+`TCE <http://tce.cs.tut.fi/>`_ is a toolset for designing customized
+processors based on the Transport Triggered Architecture (TTA).
+The toolset provides a complete co-design flow from C/C++
+programs down to synthesizable VHDL/Verilog and parallel program binaries.
+Processor customization points include register files, function units,
+supported operations, and the interconnection network.
+
+TCE uses Clang and LLVM for C/C++/OpenCL C language support, target independent
+optimizations and also for parts of code generation. It generates new
+LLVM-based code generators "on the fly" for the designed TTA processors and
+loads them in to the compiler backend as runtime libraries to avoid
+per-target recompilation of larger parts of the compiler chain.
+
 
 Additional Information
 ======================
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index b3905cc01e84b..ed328f12c4639 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -127,10 +127,15 @@ static cl::opt<unsigned> MulOpsInlineThreshold(
     cl::desc("Threshold for inlining multiplication operands into a SCEV"),
     cl::init(1000));
 
-static cl::opt<unsigned>
-    MaxCompareDepth("scalar-evolution-max-compare-depth", cl::Hidden,
-                    cl::desc("Maximum depth of recursive compare complexity"),
-                    cl::init(32));
+static cl::opt<unsigned> MaxSCEVCompareDepth(
+    "scalar-evolution-max-scev-compare-depth", cl::Hidden,
+    cl::desc("Maximum depth of recursive SCEV complexity comparisons"),
+    cl::init(32));
+
+static cl::opt<unsigned> MaxValueCompareDepth(
+    "scalar-evolution-max-value-compare-depth", cl::Hidden,
+    cl::desc("Maximum depth of recursive value complexity comparisons"),
+    cl::init(2));
 
 //===----------------------------------------------------------------------===//
 //                           SCEV class definitions
@@ -481,7 +486,7 @@ static int
 CompareValueComplexity(SmallSet<std::pair<Value *, Value *>, 8> &EqCache,
                        const LoopInfo *const LI, Value *LV, Value *RV,
                        unsigned Depth) {
-  if (Depth > MaxCompareDepth || EqCache.count({LV, RV}))
+  if (Depth > MaxValueCompareDepth || EqCache.count({LV, RV}))
     return 0;
 
   // Order pointer values after integer values. This helps SCEVExpander form
@@ -568,7 +573,7 @@ static int CompareSCEVComplexity(
   if (LType != RType)
     return (int)LType - (int)RType;
 
-  if (Depth > MaxCompareDepth || EqCacheSCEV.count({LHS, RHS}))
+  if (Depth > MaxSCEVCompareDepth || EqCacheSCEV.count({LHS, RHS}))
     return 0;
   // Aside from the getSCEVType() ordering, the particular ordering
   // isn't very important except that it's beneficial to be consistent,
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index d086ee05a64fe..941efb210d1c8 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -141,8 +141,8 @@ static cl::opt<int> PreInlineThreshold(
              "(default = 75)"));
 
 static cl::opt<bool> EnableGVNHoist(
-    "enable-gvn-hoist", cl::init(true), cl::Hidden,
-    cl::desc("Enable the GVN hoisting pass (default = on)"));
+    "enable-gvn-hoist", cl::init(false), cl::Hidden,
+    cl::desc("Enable the GVN hoisting pass"));
 
 static cl::opt<bool>
     DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false),
diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp
index 90c26e13db78a..f8e1d2e1a08ab 100644
--- a/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/lib/Transforms/Scalar/GVNHoist.cpp
@@ -200,13 +200,11 @@ static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) {
 class GVNHoist {
 public:
   GVNHoist(DominatorTree *DT, AliasAnalysis *AA, MemoryDependenceResults *MD,
-           MemorySSA *MSSA, bool OptForMinSize)
-      : DT(DT), AA(AA), MD(MD), MSSA(MSSA), OptForMinSize(OptForMinSize),
-        HoistingGeps(OptForMinSize), HoistedCtr(0) {
-      // Hoist as far as possible when optimizing for code-size.
-      if (OptForMinSize)
-        MaxNumberOfBBSInPath = -1;
-  }
+           MemorySSA *MSSA)
+      : DT(DT), AA(AA), MD(MD), MSSA(MSSA),
+        HoistingGeps(false),
+        HoistedCtr(0)
+  { }
 
   bool run(Function &F) {
     VN.setDomTree(DT);
@@ -251,7 +249,6 @@ private:
   AliasAnalysis *AA;
   MemoryDependenceResults *MD;
   MemorySSA *MSSA;
-  const bool OptForMinSize;
   const bool HoistingGeps;
   DenseMap<const Value *, unsigned> DFSNumber;
   BBSideEffectsSet BBSideEffects;
@@ -505,11 +502,6 @@ private:
   bool safeToHoistScalar(const BasicBlock *HoistBB,
                          SmallPtrSetImpl<const BasicBlock *> &WL,
                          int &NBBsOnAllPaths) {
-    // Enable scalar hoisting at -Oz as it is safe to hoist scalars to a place
-    // where they are partially needed.
-    if (OptForMinSize)
-      return true;
-
     // Check that the hoisted expression is needed on all paths.
     if (!hoistingFromAllPaths(HoistBB, WL))
       return false;
@@ -923,13 +915,8 @@ private:
                 Intr->getIntrinsicID() == Intrinsic::assume)
               continue;
           }
-          if (Call->mayHaveSideEffects()) {
-            if (!OptForMinSize)
-              break;
-            // We may continue hoisting across calls which write to memory.
-            if (Call->mayThrow())
-              break;
-          }
+          if (Call->mayHaveSideEffects())
+            break;
 
           if (Call->isConvergent())
             break;
@@ -971,7 +958,7 @@ public:
     auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
     auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
 
-    GVNHoist G(&DT, &AA, &MD, &MSSA, F.optForMinSize());
+    GVNHoist G(&DT, &AA, &MD, &MSSA);
     return G.run(F);
   }
 
@@ -991,7 +978,7 @@ PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) {
   AliasAnalysis &AA = AM.getResult<AAManager>(F);
   MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F);
   MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
-  GVNHoist G(&DT, &AA, &MD, &MSSA, F.optForMinSize());
+  GVNHoist G(&DT, &AA, &MD, &MSSA);
   if (!G.run(F))
     return PreservedAnalyses::all();
 
diff --git a/test/DebugInfo/Inputs/split-dwarf-empty.o b/test/DebugInfo/Inputs/split-dwarf-empty.o
new file mode 100644
index 0000000000000..95e2ae1259152
--- /dev/null
+++ b/test/DebugInfo/Inputs/split-dwarf-empty.o
diff --git a/test/Feature/optnone-opt.ll b/test/Feature/optnone-opt.ll
index a00013ec17977..f53877d4aea9f 100644
--- a/test/Feature/optnone-opt.ll
+++ b/test/Feature/optnone-opt.ll
@@ -41,7 +41,6 @@ attributes #0 = { optnone noinline }
 ; OPT-O1-DAG: Skipping pass 'Combine redundant instructions'
 ; OPT-O1-DAG: Skipping pass 'Dead Store Elimination'
 ; OPT-O1-DAG: Skipping pass 'Early CSE'
-; OPT-O1-DAG: Skipping pass 'Early GVN Hoisting of Expressions'
 ; OPT-O1-DAG: Skipping pass 'Jump Threading'
 ; OPT-O1-DAG: Skipping pass 'MemCpy Optimization'
 ; OPT-O1-DAG: Skipping pass 'Reassociate expressions'
diff --git a/test/Object/Inputs/dynamic-reloc.so b/test/Object/Inputs/dynamic-reloc.so
new file mode 100755
index 0000000000000..8de35691ba086
--- /dev/null
+++ b/test/Object/Inputs/dynamic-reloc.so
diff --git a/test/Object/Inputs/macho-bad-archive1.a b/test/Object/Inputs/macho-bad-archive1.a
new file mode 100644
index 0000000000000..42ef43f59325c
--- /dev/null
+++ b/test/Object/Inputs/macho-bad-archive1.a
diff --git a/test/Object/Inputs/macho-bad-archive2.a b/test/Object/Inputs/macho-bad-archive2.a
new file mode 100644
index 0000000000000..006822f00019f
--- /dev/null
+++ b/test/Object/Inputs/macho-bad-archive2.a
diff --git a/test/Object/Inputs/macho-toc64-archive-x86_64.a b/test/Object/Inputs/macho-toc64-archive-x86_64.a
new file mode 100644
index 0000000000000..e73f3dce6bd4b
--- /dev/null
+++ b/test/Object/Inputs/macho-toc64-archive-x86_64.a
diff --git a/test/Transforms/GVNHoist/hoist-unsafe-pr31729.ll b/test/Transforms/GVNHoist/hoist-unsafe-pr31729.ll
new file mode 100644
index 0000000000000..654d5b6a5585f
--- /dev/null
+++ b/test/Transforms/GVNHoist/hoist-unsafe-pr31729.ll
@@ -0,0 +1,81 @@
+; RUN: opt -gvn-hoist -S < %s | FileCheck %s
+
+; Check that urem is not hoisted.
+; CHECK-LABEL: @main
+; CHECK: urem
+; CHECK: urem
+; CHECK: urem
+
+@g_x_s = global i32 -470211272, align 4
+@g_z_s = global i32 2007237709, align 4
+@g_x_u = global i32 282475249, align 4
+@g_z_u = global i32 984943658, align 4
+@g_m = global i32 16807, align 4
+@res = common global i32 0, align 4
+
+; Function Attrs:
+define i64 @func() #0 {
+entry:
+  ret i64 1
+}
+
+; Function Attrs:
+define i32 @main() {
+entry:
+  %0 = load volatile i32, i32* @g_x_s, align 4
+  %1 = load volatile i32, i32* @g_z_s, align 4
+  %2 = load volatile i32, i32* @g_x_u, align 4
+  %3 = load volatile i32, i32* @g_z_u, align 4
+  %4 = load volatile i32, i32* @g_m, align 4
+  %call = call i64 @func() #4
+  %conv = sext i32 %1 to i64
+  %cmp = icmp ne i64 %call, %conv
+  br i1 %cmp, label %if.end, label %lor.lhs.false
+
+lor.lhs.false:
+  %div = udiv i32 %4, %1
+  %rem = urem i32 %0, %div
+  %cmp2 = icmp eq i32 %rem, 0
+  br i1 %cmp2, label %if.end, label %if.then
+
+if.then:
+  br label %cleanup
+
+if.end:
+  %call4 = call i64 @func() #4
+  %conv5 = zext i32 %3 to i64
+  %cmp6 = icmp ne i64 %call4, %conv5
+  br i1 %cmp6, label %if.end14, label %lor.lhs.false8
+
+lor.lhs.false8:
+  %div9 = udiv i32 %4, %3
+  %rem10 = urem i32 %0, %div9
+  %cmp11 = icmp eq i32 %rem10, 0
+  br i1 %cmp11, label %if.end14, label %if.then13
+
+if.then13:
+  br label %cleanup
+
+if.end14:
+  %call15 = call i64 @func() #4
+  %cmp17 = icmp ne i64 %call15, %conv
+  br i1 %cmp17, label %if.end25, label %lor.lhs.false19
+
+lor.lhs.false19:
+  %div20 = udiv i32 %4, %1
+  %rem21 = urem i32 %0, %div20
+  %cmp22 = icmp eq i32 %rem21, 0
+  br i1 %cmp22, label %if.end25, label %if.then24
+
+if.then24:
+  br label %cleanup
+
+if.end25:
+  br label %cleanup
+
+cleanup:
+  %retval.0 = phi i32 [ 0, %if.end25 ], [ 1, %if.then24 ], [ 1, %if.then13 ], [ 1, %if.then ]
+  ret i32 %retval.0
+}
+
+attributes #0 = { minsize noinline nounwind optsize uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/tools/dsymutil/Inputs/common.macho.x86_64.o b/test/tools/dsymutil/Inputs/common.macho.x86_64.o
new file mode 100644
index 0000000000000..491009bc866e2
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/common.macho.x86_64.o
diff --git a/test/tools/dsymutil/Inputs/thumb.o b/test/tools/dsymutil/Inputs/thumb.o
new file mode 100644
index 0000000000000..8bac2a1e484c4
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/thumb.o
diff --git a/test/tools/llvm-objdump/Inputs/eh_frame_zero_cie.o b/test/tools/llvm-objdump/Inputs/eh_frame_zero_cie.o
new file mode 100644
index 0000000000000..ce159ad52f412
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/eh_frame_zero_cie.o
diff --git a/unittests/Analysis/ScalarEvolutionTest.cpp b/unittests/Analysis/ScalarEvolutionTest.cpp
index 752cc8128248e..f4370842edb5e 100644
--- a/unittests/Analysis/ScalarEvolutionTest.cpp
+++ b/unittests/Analysis/ScalarEvolutionTest.cpp
@@ -465,7 +465,7 @@ TEST_F(ScalarEvolutionsTest, CommutativeExprOperandOrder) {
     });
 }
 
-TEST_F(ScalarEvolutionsTest, SCEVCompareComplexity) {
+TEST_F(ScalarEvolutionsTest, CompareSCEVComplexity) {
   FunctionType *FTy =
       FunctionType::get(Type::getVoidTy(Context), std::vector<Type *>(), false);
   Function *F = cast<Function>(M.getOrInsertFunction("f", FTy));
@@ -532,5 +532,41 @@ TEST_F(ScalarEvolutionsTest, SCEVCompareComplexity) {
   EXPECT_NE(nullptr, SE.getSCEV(Acc[0]));
 }
 
+TEST_F(ScalarEvolutionsTest, CompareValueComplexity) {
+  IntegerType *IntPtrTy = M.getDataLayout().getIntPtrType(Context);
+  PointerType *IntPtrPtrTy = IntPtrTy->getPointerTo();
+
+  FunctionType *FTy =
+      FunctionType::get(Type::getVoidTy(Context), {IntPtrTy, IntPtrTy}, false);
+  Function *F = cast<Function>(M.getOrInsertFunction("f", FTy));
+  BasicBlock *EntryBB = BasicBlock::Create(Context, "entry", F);
+
+  Value *X = &*F->arg_begin();
+  Value *Y = &*std::next(F->arg_begin());
+
+  const int ValueDepth = 10;
+  for (int i = 0; i < ValueDepth; i++) {
+    X = new LoadInst(new IntToPtrInst(X, IntPtrPtrTy, "", EntryBB), "",
+                     /*isVolatile*/ false, EntryBB);
+    Y = new LoadInst(new IntToPtrInst(Y, IntPtrPtrTy, "", EntryBB), "",
+                     /*isVolatile*/ false, EntryBB);
+  }
+
+  auto *MulA = BinaryOperator::CreateMul(X, Y, "", EntryBB);
+  auto *MulB = BinaryOperator::CreateMul(Y, X, "", EntryBB);
+  ReturnInst::Create(Context, nullptr, EntryBB);
+
+  // This test isn't checking for correctness.  Today making A and B resolve to
+  // the same SCEV would require deeper searching in CompareValueComplexity,
+  // which will slow down compilation.  However, this test can fail (with LLVM's
+  // behavior still being correct) if we ever have a smarter
+  // CompareValueComplexity that is both fast and more accurate.
+
+  ScalarEvolution SE = buildSE(*F);
+  auto *A = SE.getSCEV(MulA);
+  auto *B = SE.getSCEV(MulB);
+  EXPECT_NE(A, B);
+}
+
 }  // end anonymous namespace
 }  // end namespace llvm