summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/ReleaseNotes.rst55
-rw-r--r--lib/Analysis/ScalarEvolution.cpp17
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp4
-rw-r--r--lib/Transforms/Scalar/GVNHoist.cpp31
-rw-r--r--test/DebugInfo/Inputs/split-dwarf-empty.obin0 -> 1648 bytes
-rw-r--r--test/Feature/optnone-opt.ll1
-rwxr-xr-xtest/Object/Inputs/dynamic-reloc.sobin0 -> 2088 bytes
-rw-r--r--test/Object/Inputs/macho-bad-archive1.abin0 -> 5544 bytes
-rw-r--r--test/Object/Inputs/macho-bad-archive2.abin0 -> 1084 bytes
-rw-r--r--test/Object/Inputs/macho-toc64-archive-x86_64.abin0 -> 1576 bytes
-rw-r--r--test/Transforms/GVNHoist/hoist-unsafe-pr31729.ll81
-rw-r--r--test/tools/dsymutil/Inputs/common.macho.x86_64.obin0 -> 2404 bytes
-rw-r--r--test/tools/dsymutil/Inputs/thumb.obin0 -> 1224 bytes
-rw-r--r--test/tools/llvm-objdump/Inputs/eh_frame_zero_cie.obin0 -> 456 bytes
-rw-r--r--unittests/Analysis/ScalarEvolutionTest.cpp38
15 files changed, 185 insertions, 42 deletions
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index 549021e0e6e62..573fcb822aba2 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -30,7 +30,8 @@ from now, will be version 5.0.0.
Non-comprehensive list of changes in this release
=================================================
-* Minimum compiler version to build has been raised to GCC 4.8 and VS 2015.
+* The minimum compiler version required for building LLVM has been raised to
+ 4.8 for GCC and 2015 for Visual Studio.
* The C API functions ``LLVMAddFunctionAttr``, ``LLVMGetFunctionAttr``,
``LLVMRemoveFunctionAttr``, ``LLVMAddAttribute``, ``LLVMRemoveAttribute``,
@@ -56,15 +57,8 @@ Non-comprehensive list of changes in this release
with LLVM option ``-adce-remove-loops`` when the loop body otherwise has
no live operations.
-* The GVNHoist pass is now enabled by default. The new pass based on Global
- Value Numbering detects similar computations in branch code and replaces
- multiple instances of the same computation with a unique expression. The
- transform benefits code size and generates better schedules. GVNHoist is
- more aggressive at ``-Os`` and ``-Oz``, hoisting more expressions at the
- expense of execution time degradations.
-
- * The llvm-cov tool can now export coverage data as json. Its html output mode
- has also improved.
+* The llvm-cov tool can now export coverage data as json. Its html output mode
+ has also improved.
Improvements to ThinLTO (-flto=thin)
------------------------------------
@@ -225,6 +219,10 @@ Changes to the ARM Targets
A lot of work has also been done in LLD for ARM, which now supports more
relocations and TLS.
+Note: From the next release (5.0), the "vulcan" target will be renamed to
+"thunderx2t99", including command line options, assembly directives, etc. This
+release (4.0) will be the last one to accept "vulcan" as its name.
+
Changes to the AVR Target
-----------------------------
@@ -274,6 +272,15 @@ Changes to the MIPS Target
* Fixed several crashes involving FastISel.
* Corrected the corrected definitions for aui/daui/dahi/dati for MIPSR6.
+Changes to the X86 Target
+-------------------------
+
+**During this release the X86 target has:**
+
+* Added support AMD Ryzen (znver1) CPUs.
+* Gained support for using VEX encoding on AVX-512 CPUs to reduce code size when possible.
+* Improved AVX-512 codegen.
+
Changes to the OCaml bindings
-----------------------------
@@ -299,6 +306,34 @@ x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on ARM
and PowerPC (32/64 bit). Ports to other architectures like AArch64 and MIPS64
are underway.
+Portable Computing Language (pocl)
+----------------------------------
+
+In addition to producing an easily portable open source OpenCL
+implementation, another major goal of `pocl <http://pocl.sourceforge.net/>`_
+is improving performance portability of OpenCL programs with
+compiler optimizations, reducing the need for target-dependent manual
+optimizations. An important part of pocl is a set of LLVM passes used to
+statically parallelize multiple work-items with the kernel compiler, even in
+the presence of work-group barriers. This enables static parallelization of
+the fine-grained static concurrency in the work groups in multiple ways.
+
+TTA-based Co-design Environment (TCE)
+-------------------------------------
+
+`TCE <http://tce.cs.tut.fi/>`_ is a toolset for designing customized
+processors based on the Transport Triggered Architecture (TTA).
+The toolset provides a complete co-design flow from C/C++
+programs down to synthesizable VHDL/Verilog and parallel program binaries.
+Processor customization points include register files, function units,
+supported operations, and the interconnection network.
+
+TCE uses Clang and LLVM for C/C++/OpenCL C language support, target independent
+optimizations and also for parts of code generation. It generates new
+LLVM-based code generators "on the fly" for the designed TTA processors and
+loads them in to the compiler backend as runtime libraries to avoid
+per-target recompilation of larger parts of the compiler chain.
+
Additional Information
======================
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index b3905cc01e84b..ed328f12c4639 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -127,10 +127,15 @@ static cl::opt<unsigned> MulOpsInlineThreshold(
cl::desc("Threshold for inlining multiplication operands into a SCEV"),
cl::init(1000));
-static cl::opt<unsigned>
- MaxCompareDepth("scalar-evolution-max-compare-depth", cl::Hidden,
- cl::desc("Maximum depth of recursive compare complexity"),
- cl::init(32));
+static cl::opt<unsigned> MaxSCEVCompareDepth(
+ "scalar-evolution-max-scev-compare-depth", cl::Hidden,
+ cl::desc("Maximum depth of recursive SCEV complexity comparisons"),
+ cl::init(32));
+
+static cl::opt<unsigned> MaxValueCompareDepth(
+ "scalar-evolution-max-value-compare-depth", cl::Hidden,
+ cl::desc("Maximum depth of recursive value complexity comparisons"),
+ cl::init(2));
//===----------------------------------------------------------------------===//
// SCEV class definitions
@@ -481,7 +486,7 @@ static int
CompareValueComplexity(SmallSet<std::pair<Value *, Value *>, 8> &EqCache,
const LoopInfo *const LI, Value *LV, Value *RV,
unsigned Depth) {
- if (Depth > MaxCompareDepth || EqCache.count({LV, RV}))
+ if (Depth > MaxValueCompareDepth || EqCache.count({LV, RV}))
return 0;
// Order pointer values after integer values. This helps SCEVExpander form
@@ -568,7 +573,7 @@ static int CompareSCEVComplexity(
if (LType != RType)
return (int)LType - (int)RType;
- if (Depth > MaxCompareDepth || EqCacheSCEV.count({LHS, RHS}))
+ if (Depth > MaxSCEVCompareDepth || EqCacheSCEV.count({LHS, RHS}))
return 0;
// Aside from the getSCEVType() ordering, the particular ordering
// isn't very important except that it's beneficial to be consistent,
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index d086ee05a64fe..941efb210d1c8 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -141,8 +141,8 @@ static cl::opt<int> PreInlineThreshold(
"(default = 75)"));
static cl::opt<bool> EnableGVNHoist(
- "enable-gvn-hoist", cl::init(true), cl::Hidden,
- cl::desc("Enable the GVN hoisting pass (default = on)"));
+ "enable-gvn-hoist", cl::init(false), cl::Hidden,
+ cl::desc("Enable the GVN hoisting pass"));
static cl::opt<bool>
DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false),
diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp
index 90c26e13db78a..f8e1d2e1a08ab 100644
--- a/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/lib/Transforms/Scalar/GVNHoist.cpp
@@ -200,13 +200,11 @@ static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) {
class GVNHoist {
public:
GVNHoist(DominatorTree *DT, AliasAnalysis *AA, MemoryDependenceResults *MD,
- MemorySSA *MSSA, bool OptForMinSize)
- : DT(DT), AA(AA), MD(MD), MSSA(MSSA), OptForMinSize(OptForMinSize),
- HoistingGeps(OptForMinSize), HoistedCtr(0) {
- // Hoist as far as possible when optimizing for code-size.
- if (OptForMinSize)
- MaxNumberOfBBSInPath = -1;
- }
+ MemorySSA *MSSA)
+ : DT(DT), AA(AA), MD(MD), MSSA(MSSA),
+ HoistingGeps(false),
+ HoistedCtr(0)
+ { }
bool run(Function &F) {
VN.setDomTree(DT);
@@ -251,7 +249,6 @@ private:
AliasAnalysis *AA;
MemoryDependenceResults *MD;
MemorySSA *MSSA;
- const bool OptForMinSize;
const bool HoistingGeps;
DenseMap<const Value *, unsigned> DFSNumber;
BBSideEffectsSet BBSideEffects;
@@ -505,11 +502,6 @@ private:
bool safeToHoistScalar(const BasicBlock *HoistBB,
SmallPtrSetImpl<const BasicBlock *> &WL,
int &NBBsOnAllPaths) {
- // Enable scalar hoisting at -Oz as it is safe to hoist scalars to a place
- // where they are partially needed.
- if (OptForMinSize)
- return true;
-
// Check that the hoisted expression is needed on all paths.
if (!hoistingFromAllPaths(HoistBB, WL))
return false;
@@ -923,13 +915,8 @@ private:
Intr->getIntrinsicID() == Intrinsic::assume)
continue;
}
- if (Call->mayHaveSideEffects()) {
- if (!OptForMinSize)
- break;
- // We may continue hoisting across calls which write to memory.
- if (Call->mayThrow())
- break;
- }
+ if (Call->mayHaveSideEffects())
+ break;
if (Call->isConvergent())
break;
@@ -971,7 +958,7 @@ public:
auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
- GVNHoist G(&DT, &AA, &MD, &MSSA, F.optForMinSize());
+ GVNHoist G(&DT, &AA, &MD, &MSSA);
return G.run(F);
}
@@ -991,7 +978,7 @@ PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) {
AliasAnalysis &AA = AM.getResult<AAManager>(F);
MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F);
MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
- GVNHoist G(&DT, &AA, &MD, &MSSA, F.optForMinSize());
+ GVNHoist G(&DT, &AA, &MD, &MSSA);
if (!G.run(F))
return PreservedAnalyses::all();
diff --git a/test/DebugInfo/Inputs/split-dwarf-empty.o b/test/DebugInfo/Inputs/split-dwarf-empty.o
new file mode 100644
index 0000000000000..95e2ae1259152
--- /dev/null
+++ b/test/DebugInfo/Inputs/split-dwarf-empty.o
Binary files differ
diff --git a/test/Feature/optnone-opt.ll b/test/Feature/optnone-opt.ll
index a00013ec17977..f53877d4aea9f 100644
--- a/test/Feature/optnone-opt.ll
+++ b/test/Feature/optnone-opt.ll
@@ -41,7 +41,6 @@ attributes #0 = { optnone noinline }
; OPT-O1-DAG: Skipping pass 'Combine redundant instructions'
; OPT-O1-DAG: Skipping pass 'Dead Store Elimination'
; OPT-O1-DAG: Skipping pass 'Early CSE'
-; OPT-O1-DAG: Skipping pass 'Early GVN Hoisting of Expressions'
; OPT-O1-DAG: Skipping pass 'Jump Threading'
; OPT-O1-DAG: Skipping pass 'MemCpy Optimization'
; OPT-O1-DAG: Skipping pass 'Reassociate expressions'
diff --git a/test/Object/Inputs/dynamic-reloc.so b/test/Object/Inputs/dynamic-reloc.so
new file mode 100755
index 0000000000000..8de35691ba086
--- /dev/null
+++ b/test/Object/Inputs/dynamic-reloc.so
Binary files differ
diff --git a/test/Object/Inputs/macho-bad-archive1.a b/test/Object/Inputs/macho-bad-archive1.a
new file mode 100644
index 0000000000000..42ef43f59325c
--- /dev/null
+++ b/test/Object/Inputs/macho-bad-archive1.a
Binary files differ
diff --git a/test/Object/Inputs/macho-bad-archive2.a b/test/Object/Inputs/macho-bad-archive2.a
new file mode 100644
index 0000000000000..006822f00019f
--- /dev/null
+++ b/test/Object/Inputs/macho-bad-archive2.a
Binary files differ
diff --git a/test/Object/Inputs/macho-toc64-archive-x86_64.a b/test/Object/Inputs/macho-toc64-archive-x86_64.a
new file mode 100644
index 0000000000000..e73f3dce6bd4b
--- /dev/null
+++ b/test/Object/Inputs/macho-toc64-archive-x86_64.a
Binary files differ
diff --git a/test/Transforms/GVNHoist/hoist-unsafe-pr31729.ll b/test/Transforms/GVNHoist/hoist-unsafe-pr31729.ll
new file mode 100644
index 0000000000000..654d5b6a5585f
--- /dev/null
+++ b/test/Transforms/GVNHoist/hoist-unsafe-pr31729.ll
@@ -0,0 +1,81 @@
+; RUN: opt -gvn-hoist -S < %s | FileCheck %s
+
+; Check that urem is not hoisted.
+; CHECK-LABEL: @main
+; CHECK: urem
+; CHECK: urem
+; CHECK: urem
+
+@g_x_s = global i32 -470211272, align 4
+@g_z_s = global i32 2007237709, align 4
+@g_x_u = global i32 282475249, align 4
+@g_z_u = global i32 984943658, align 4
+@g_m = global i32 16807, align 4
+@res = common global i32 0, align 4
+
+; Function Attrs:
+define i64 @func() #0 {
+entry:
+ ret i64 1
+}
+
+; Function Attrs:
+define i32 @main() {
+entry:
+ %0 = load volatile i32, i32* @g_x_s, align 4
+ %1 = load volatile i32, i32* @g_z_s, align 4
+ %2 = load volatile i32, i32* @g_x_u, align 4
+ %3 = load volatile i32, i32* @g_z_u, align 4
+ %4 = load volatile i32, i32* @g_m, align 4
+ %call = call i64 @func() #4
+ %conv = sext i32 %1 to i64
+ %cmp = icmp ne i64 %call, %conv
+ br i1 %cmp, label %if.end, label %lor.lhs.false
+
+lor.lhs.false:
+ %div = udiv i32 %4, %1
+ %rem = urem i32 %0, %div
+ %cmp2 = icmp eq i32 %rem, 0
+ br i1 %cmp2, label %if.end, label %if.then
+
+if.then:
+ br label %cleanup
+
+if.end:
+ %call4 = call i64 @func() #4
+ %conv5 = zext i32 %3 to i64
+ %cmp6 = icmp ne i64 %call4, %conv5
+ br i1 %cmp6, label %if.end14, label %lor.lhs.false8
+
+lor.lhs.false8:
+ %div9 = udiv i32 %4, %3
+ %rem10 = urem i32 %0, %div9
+ %cmp11 = icmp eq i32 %rem10, 0
+ br i1 %cmp11, label %if.end14, label %if.then13
+
+if.then13:
+ br label %cleanup
+
+if.end14:
+ %call15 = call i64 @func() #4
+ %cmp17 = icmp ne i64 %call15, %conv
+ br i1 %cmp17, label %if.end25, label %lor.lhs.false19
+
+lor.lhs.false19:
+ %div20 = udiv i32 %4, %1
+ %rem21 = urem i32 %0, %div20
+ %cmp22 = icmp eq i32 %rem21, 0
+ br i1 %cmp22, label %if.end25, label %if.then24
+
+if.then24:
+ br label %cleanup
+
+if.end25:
+ br label %cleanup
+
+cleanup:
+ %retval.0 = phi i32 [ 0, %if.end25 ], [ 1, %if.then24 ], [ 1, %if.then13 ], [ 1, %if.then ]
+ ret i32 %retval.0
+}
+
+attributes #0 = { minsize noinline nounwind optsize uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/tools/dsymutil/Inputs/common.macho.x86_64.o b/test/tools/dsymutil/Inputs/common.macho.x86_64.o
new file mode 100644
index 0000000000000..491009bc866e2
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/common.macho.x86_64.o
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/thumb.o b/test/tools/dsymutil/Inputs/thumb.o
new file mode 100644
index 0000000000000..8bac2a1e484c4
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/thumb.o
Binary files differ
diff --git a/test/tools/llvm-objdump/Inputs/eh_frame_zero_cie.o b/test/tools/llvm-objdump/Inputs/eh_frame_zero_cie.o
new file mode 100644
index 0000000000000..ce159ad52f412
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/eh_frame_zero_cie.o
Binary files differ
diff --git a/unittests/Analysis/ScalarEvolutionTest.cpp b/unittests/Analysis/ScalarEvolutionTest.cpp
index 752cc8128248e..f4370842edb5e 100644
--- a/unittests/Analysis/ScalarEvolutionTest.cpp
+++ b/unittests/Analysis/ScalarEvolutionTest.cpp
@@ -465,7 +465,7 @@ TEST_F(ScalarEvolutionsTest, CommutativeExprOperandOrder) {
});
}
-TEST_F(ScalarEvolutionsTest, SCEVCompareComplexity) {
+TEST_F(ScalarEvolutionsTest, CompareSCEVComplexity) {
FunctionType *FTy =
FunctionType::get(Type::getVoidTy(Context), std::vector<Type *>(), false);
Function *F = cast<Function>(M.getOrInsertFunction("f", FTy));
@@ -532,5 +532,41 @@ TEST_F(ScalarEvolutionsTest, SCEVCompareComplexity) {
EXPECT_NE(nullptr, SE.getSCEV(Acc[0]));
}
+TEST_F(ScalarEvolutionsTest, CompareValueComplexity) {
+ IntegerType *IntPtrTy = M.getDataLayout().getIntPtrType(Context);
+ PointerType *IntPtrPtrTy = IntPtrTy->getPointerTo();
+
+ FunctionType *FTy =
+ FunctionType::get(Type::getVoidTy(Context), {IntPtrTy, IntPtrTy}, false);
+ Function *F = cast<Function>(M.getOrInsertFunction("f", FTy));
+ BasicBlock *EntryBB = BasicBlock::Create(Context, "entry", F);
+
+ Value *X = &*F->arg_begin();
+ Value *Y = &*std::next(F->arg_begin());
+
+ const int ValueDepth = 10;
+ for (int i = 0; i < ValueDepth; i++) {
+ X = new LoadInst(new IntToPtrInst(X, IntPtrPtrTy, "", EntryBB), "",
+ /*isVolatile*/ false, EntryBB);
+ Y = new LoadInst(new IntToPtrInst(Y, IntPtrPtrTy, "", EntryBB), "",
+ /*isVolatile*/ false, EntryBB);
+ }
+
+ auto *MulA = BinaryOperator::CreateMul(X, Y, "", EntryBB);
+ auto *MulB = BinaryOperator::CreateMul(Y, X, "", EntryBB);
+ ReturnInst::Create(Context, nullptr, EntryBB);
+
+ // This test isn't checking for correctness. Today making A and B resolve to
+ // the same SCEV would require deeper searching in CompareValueComplexity,
+ // which will slow down compilation. However, this test can fail (with LLVM's
+ // behavior still being correct) if we ever have a smarter
+ // CompareValueComplexity that is both fast and more accurate.
+
+ ScalarEvolution SE = buildSE(*F);
+ auto *A = SE.getSCEV(MulA);
+ auto *B = SE.getSCEV(MulB);
+ EXPECT_NE(A, B);
+}
+
} // end anonymous namespace
} // end namespace llvm