aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/Utils
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-04-16 16:01:22 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-04-16 16:01:22 +0000
commit71d5a2540a98c81f5bcaeb48805e0e2881f530ef (patch)
tree5343938942df402b49ec7300a1c25a2d4ccd5821 /lib/Transforms/Utils
parent31bbf64f3a4974a2d6c8b3b27ad2f519caf74057 (diff)
Notes
Diffstat (limited to 'lib/Transforms/Utils')
-rw-r--r--lib/Transforms/Utils/AddDiscriminators.cpp24
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp9
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp448
-rw-r--r--lib/Transforms/Utils/BypassSlowDivision.cpp532
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt4
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp65
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp13
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp19
-rw-r--r--lib/Transforms/Utils/DemoteRegToStack.cpp17
-rw-r--r--lib/Transforms/Utils/Evaluator.cpp3
-rw-r--r--lib/Transforms/Utils/FunctionComparator.cpp8
-rw-r--r--lib/Transforms/Utils/FunctionImportUtils.cpp14
-rw-r--r--lib/Transforms/Utils/GlobalStatus.cpp21
-rw-r--r--lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp2
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp152
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp84
-rw-r--r--lib/Transforms/Utils/LibCallsShrinkWrap.cpp182
-rw-r--r--lib/Transforms/Utils/Local.cpp148
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp40
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp167
-rw-r--r--lib/Transforms/Utils/LoopUnrollPeel.cpp98
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp52
-rw-r--r--lib/Transforms/Utils/LoopUtils.cpp25
-rw-r--r--lib/Transforms/Utils/LowerMemIntrinsics.cpp231
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp8
-rw-r--r--lib/Transforms/Utils/Mem2Reg.cpp7
-rw-r--r--lib/Transforms/Utils/MemorySSA.cpp2305
-rw-r--r--lib/Transforms/Utils/MetaRenamer.cpp17
-rw-r--r--lib/Transforms/Utils/ModuleUtils.cpp23
-rw-r--r--lib/Transforms/Utils/PredicateInfo.cpp782
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp93
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp27
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp171
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp42
-rw-r--r--lib/Transforms/Utils/SimplifyInstructions.cpp22
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp399
-rw-r--r--lib/Transforms/Utils/Utils.cpp3
-rw-r--r--lib/Transforms/Utils/VNCoercion.cpp482
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp1
39 files changed, 3340 insertions, 3400 deletions
diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp
index 2e95926c0b3f..4c9746b8c691 100644
--- a/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -102,6 +102,10 @@ FunctionPass *llvm::createAddDiscriminatorsPass() {
return new AddDiscriminatorsLegacyPass();
}
+static bool shouldHaveDiscriminator(const Instruction *I) {
+ return !isa<IntrinsicInst>(I) || isa<MemIntrinsic>(I);
+}
+
/// \brief Assign DWARF discriminators.
///
/// To assign discriminators, we examine the boundaries of every
@@ -176,7 +180,13 @@ static bool addDiscriminators(Function &F) {
// discriminator for this instruction.
for (BasicBlock &B : F) {
for (auto &I : B.getInstList()) {
- if (isa<IntrinsicInst>(&I))
+ // Not all intrinsic calls should have a discriminator.
+ // We want to avoid a non-deterministic assignment of discriminators at
+ // different debug levels. We still allow discriminators on memory
+ // intrinsic calls because those can be early expanded by SROA into
+ // pairs of loads and stores, and the expanded load/store instructions
+ // should have a valid discriminator.
+ if (!shouldHaveDiscriminator(&I))
continue;
const DILocation *DIL = I.getDebugLoc();
if (!DIL)
@@ -190,8 +200,8 @@ static bool addDiscriminators(Function &F) {
// discriminator is needed to distinguish both instructions.
// Only the lowest 7 bits are used to represent a discriminator to fit
// it in 1 byte ULEB128 representation.
- unsigned Discriminator = (R.second ? ++LDM[L] : LDM[L]) & 0x7f;
- I.setDebugLoc(DIL->cloneWithDiscriminator(Discriminator));
+ unsigned Discriminator = R.second ? ++LDM[L] : LDM[L];
+ I.setDebugLoc(DIL->setBaseDiscriminator(Discriminator));
DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
<< DIL->getColumn() << ":" << Discriminator << " " << I
<< "\n");
@@ -207,6 +217,10 @@ static bool addDiscriminators(Function &F) {
LocationSet CallLocations;
for (auto &I : B.getInstList()) {
CallInst *Current = dyn_cast<CallInst>(&I);
+ // We bypass intrinsic calls for the following two reasons:
+ // 1) We want to avoid a non-deterministic assigment of
+ // discriminators.
+ // 2) We want to minimize the number of base discriminators used.
if (!Current || isa<IntrinsicInst>(&I))
continue;
@@ -216,8 +230,8 @@ static bool addDiscriminators(Function &F) {
Location L =
std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine());
if (!CallLocations.insert(L).second) {
- Current->setDebugLoc(
- CurrentDIL->cloneWithDiscriminator((++LDM[L]) & 0x7f));
+ unsigned Discriminator = ++LDM[L];
+ Current->setDebugLoc(CurrentDIL->setBaseDiscriminator(Discriminator));
Changed = true;
}
}
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index b90349d3cdad..22af21d55c01 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -438,7 +438,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// The new block unconditionally branches to the old block.
BranchInst *BI = BranchInst::Create(BB, NewBB);
- BI->setDebugLoc(BB->getFirstNonPHI()->getDebugLoc());
+ BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc());
// Move the edges from Preds to point to NewBB instead of BB.
for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
@@ -646,9 +646,10 @@ llvm::SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
}
if (LI) {
- Loop *L = LI->getLoopFor(Head);
- L->addBasicBlockToLoop(ThenBlock, *LI);
- L->addBasicBlockToLoop(Tail, *LI);
+ if (Loop *L = LI->getLoopFor(Head)) {
+ L->addBasicBlockToLoop(ThenBlock, *LI);
+ L->addBasicBlockToLoop(Tail, *LI);
+ }
}
return CheckTerm;
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index e61b04fbdd57..6cd9f1614991 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -96,9 +96,9 @@ static bool setDoesNotAlias(Function &F, unsigned n) {
}
static bool setNonNull(Function &F, unsigned n) {
- assert((n != AttributeSet::ReturnIndex ||
- F.getReturnType()->isPointerTy()) &&
- "nonnull applies only to pointers");
+ assert(
+ (n != AttributeList::ReturnIndex || F.getReturnType()->isPointerTy()) &&
+ "nonnull applies only to pointers");
if (F.getAttributes().hasAttribute(n, Attribute::NonNull))
return false;
F.addAttribute(n, Attribute::NonNull);
@@ -107,255 +107,255 @@ static bool setNonNull(Function &F, unsigned n) {
}
bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
- LibFunc::Func TheLibFunc;
+ LibFunc TheLibFunc;
if (!(TLI.getLibFunc(F, TheLibFunc) && TLI.has(TheLibFunc)))
return false;
bool Changed = false;
switch (TheLibFunc) {
- case LibFunc::strlen:
+ case LibFunc_strlen:
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::strchr:
- case LibFunc::strrchr:
+ case LibFunc_strchr:
+ case LibFunc_strrchr:
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
return Changed;
- case LibFunc::strtol:
- case LibFunc::strtod:
- case LibFunc::strtof:
- case LibFunc::strtoul:
- case LibFunc::strtoll:
- case LibFunc::strtold:
- case LibFunc::strtoull:
+ case LibFunc_strtol:
+ case LibFunc_strtod:
+ case LibFunc_strtof:
+ case LibFunc_strtoul:
+ case LibFunc_strtoll:
+ case LibFunc_strtold:
+ case LibFunc_strtoull:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::strcpy:
- case LibFunc::stpcpy:
- case LibFunc::strcat:
- case LibFunc::strncat:
- case LibFunc::strncpy:
- case LibFunc::stpncpy:
+ case LibFunc_strcpy:
+ case LibFunc_stpcpy:
+ case LibFunc_strcat:
+ case LibFunc_strncat:
+ case LibFunc_strncpy:
+ case LibFunc_stpncpy:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::strxfrm:
+ case LibFunc_strxfrm:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::strcmp: // 0,1
- case LibFunc::strspn: // 0,1
- case LibFunc::strncmp: // 0,1
- case LibFunc::strcspn: // 0,1
- case LibFunc::strcoll: // 0,1
- case LibFunc::strcasecmp: // 0,1
- case LibFunc::strncasecmp: //
+ case LibFunc_strcmp: // 0,1
+ case LibFunc_strspn: // 0,1
+ case LibFunc_strncmp: // 0,1
+ case LibFunc_strcspn: // 0,1
+ case LibFunc_strcoll: // 0,1
+ case LibFunc_strcasecmp: // 0,1
+ case LibFunc_strncasecmp: //
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::strstr:
- case LibFunc::strpbrk:
+ case LibFunc_strstr:
+ case LibFunc_strpbrk:
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::strtok:
- case LibFunc::strtok_r:
+ case LibFunc_strtok:
+ case LibFunc_strtok_r:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::scanf:
+ case LibFunc_scanf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::setbuf:
- case LibFunc::setvbuf:
+ case LibFunc_setbuf:
+ case LibFunc_setvbuf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::strdup:
- case LibFunc::strndup:
+ case LibFunc_strdup:
+ case LibFunc_strndup:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::stat:
- case LibFunc::statvfs:
+ case LibFunc_stat:
+ case LibFunc_statvfs:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::sscanf:
+ case LibFunc_sscanf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::sprintf:
+ case LibFunc_sprintf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::snprintf:
+ case LibFunc_snprintf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 3);
Changed |= setOnlyReadsMemory(F, 3);
return Changed;
- case LibFunc::setitimer:
+ case LibFunc_setitimer:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
Changed |= setDoesNotCapture(F, 3);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::system:
+ case LibFunc_system:
// May throw; "system" is a valid pthread cancellation point.
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::malloc:
+ case LibFunc_malloc:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
return Changed;
- case LibFunc::memcmp:
+ case LibFunc_memcmp:
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::memchr:
- case LibFunc::memrchr:
+ case LibFunc_memchr:
+ case LibFunc_memrchr:
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
return Changed;
- case LibFunc::modf:
- case LibFunc::modff:
- case LibFunc::modfl:
+ case LibFunc_modf:
+ case LibFunc_modff:
+ case LibFunc_modfl:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::memcpy:
- case LibFunc::mempcpy:
- case LibFunc::memccpy:
- case LibFunc::memmove:
+ case LibFunc_memcpy:
+ case LibFunc_mempcpy:
+ case LibFunc_memccpy:
+ case LibFunc_memmove:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::memcpy_chk:
+ case LibFunc_memcpy_chk:
Changed |= setDoesNotThrow(F);
return Changed;
- case LibFunc::memalign:
+ case LibFunc_memalign:
Changed |= setDoesNotAlias(F, 0);
return Changed;
- case LibFunc::mkdir:
+ case LibFunc_mkdir:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::mktime:
+ case LibFunc_mktime:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::realloc:
+ case LibFunc_realloc:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::read:
+ case LibFunc_read:
// May throw; "read" is a valid pthread cancellation point.
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::rewind:
+ case LibFunc_rewind:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::rmdir:
- case LibFunc::remove:
- case LibFunc::realpath:
+ case LibFunc_rmdir:
+ case LibFunc_remove:
+ case LibFunc_realpath:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::rename:
+ case LibFunc_rename:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::readlink:
+ case LibFunc_readlink:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::write:
+ case LibFunc_write:
// May throw; "write" is a valid pthread cancellation point.
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::bcopy:
+ case LibFunc_bcopy:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::bcmp:
+ case LibFunc_bcmp:
Changed |= setDoesNotThrow(F);
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::bzero:
+ case LibFunc_bzero:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::calloc:
+ case LibFunc_calloc:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
return Changed;
- case LibFunc::chmod:
- case LibFunc::chown:
+ case LibFunc_chmod:
+ case LibFunc_chown:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::ctermid:
- case LibFunc::clearerr:
- case LibFunc::closedir:
+ case LibFunc_ctermid:
+ case LibFunc_clearerr:
+ case LibFunc_closedir:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::atoi:
- case LibFunc::atol:
- case LibFunc::atof:
- case LibFunc::atoll:
+ case LibFunc_atoi:
+ case LibFunc_atol:
+ case LibFunc_atof:
+ case LibFunc_atoll:
Changed |= setDoesNotThrow(F);
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::access:
+ case LibFunc_access:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::fopen:
+ case LibFunc_fopen:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotCapture(F, 1);
@@ -363,150 +363,150 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::fdopen:
+ case LibFunc_fdopen:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::feof:
- case LibFunc::free:
- case LibFunc::fseek:
- case LibFunc::ftell:
- case LibFunc::fgetc:
- case LibFunc::fseeko:
- case LibFunc::ftello:
- case LibFunc::fileno:
- case LibFunc::fflush:
- case LibFunc::fclose:
- case LibFunc::fsetpos:
- case LibFunc::flockfile:
- case LibFunc::funlockfile:
- case LibFunc::ftrylockfile:
+ case LibFunc_feof:
+ case LibFunc_free:
+ case LibFunc_fseek:
+ case LibFunc_ftell:
+ case LibFunc_fgetc:
+ case LibFunc_fseeko:
+ case LibFunc_ftello:
+ case LibFunc_fileno:
+ case LibFunc_fflush:
+ case LibFunc_fclose:
+ case LibFunc_fsetpos:
+ case LibFunc_flockfile:
+ case LibFunc_funlockfile:
+ case LibFunc_ftrylockfile:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::ferror:
+ case LibFunc_ferror:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F);
return Changed;
- case LibFunc::fputc:
- case LibFunc::fstat:
- case LibFunc::frexp:
- case LibFunc::frexpf:
- case LibFunc::frexpl:
- case LibFunc::fstatvfs:
+ case LibFunc_fputc:
+ case LibFunc_fstat:
+ case LibFunc_frexp:
+ case LibFunc_frexpf:
+ case LibFunc_frexpl:
+ case LibFunc_fstatvfs:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::fgets:
+ case LibFunc_fgets:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 3);
return Changed;
- case LibFunc::fread:
+ case LibFunc_fread:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 4);
return Changed;
- case LibFunc::fwrite:
+ case LibFunc_fwrite:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 4);
// FIXME: readonly #1?
return Changed;
- case LibFunc::fputs:
+ case LibFunc_fputs:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::fscanf:
- case LibFunc::fprintf:
+ case LibFunc_fscanf:
+ case LibFunc_fprintf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::fgetpos:
+ case LibFunc_fgetpos:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::getc:
- case LibFunc::getlogin_r:
- case LibFunc::getc_unlocked:
+ case LibFunc_getc:
+ case LibFunc_getlogin_r:
+ case LibFunc_getc_unlocked:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::getenv:
+ case LibFunc_getenv:
Changed |= setDoesNotThrow(F);
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::gets:
- case LibFunc::getchar:
+ case LibFunc_gets:
+ case LibFunc_getchar:
Changed |= setDoesNotThrow(F);
return Changed;
- case LibFunc::getitimer:
+ case LibFunc_getitimer:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::getpwnam:
+ case LibFunc_getpwnam:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::ungetc:
+ case LibFunc_ungetc:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::uname:
+ case LibFunc_uname:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::unlink:
+ case LibFunc_unlink:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::unsetenv:
+ case LibFunc_unsetenv:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::utime:
- case LibFunc::utimes:
+ case LibFunc_utime:
+ case LibFunc_utimes:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::putc:
+ case LibFunc_putc:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::puts:
- case LibFunc::printf:
- case LibFunc::perror:
+ case LibFunc_puts:
+ case LibFunc_printf:
+ case LibFunc_perror:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::pread:
+ case LibFunc_pread:
// May throw; "pread" is a valid pthread cancellation point.
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::pwrite:
+ case LibFunc_pwrite:
// May throw; "pwrite" is a valid pthread cancellation point.
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::putchar:
+ case LibFunc_putchar:
Changed |= setDoesNotThrow(F);
return Changed;
- case LibFunc::popen:
+ case LibFunc_popen:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotCapture(F, 1);
@@ -514,132 +514,132 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::pclose:
+ case LibFunc_pclose:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::vscanf:
+ case LibFunc_vscanf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::vsscanf:
+ case LibFunc_vsscanf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::vfscanf:
+ case LibFunc_vfscanf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::valloc:
+ case LibFunc_valloc:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
return Changed;
- case LibFunc::vprintf:
+ case LibFunc_vprintf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::vfprintf:
- case LibFunc::vsprintf:
+ case LibFunc_vfprintf:
+ case LibFunc_vsprintf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::vsnprintf:
+ case LibFunc_vsnprintf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 3);
Changed |= setOnlyReadsMemory(F, 3);
return Changed;
- case LibFunc::open:
+ case LibFunc_open:
// May throw; "open" is a valid pthread cancellation point.
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::opendir:
+ case LibFunc_opendir:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::tmpfile:
+ case LibFunc_tmpfile:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
return Changed;
- case LibFunc::times:
+ case LibFunc_times:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::htonl:
- case LibFunc::htons:
- case LibFunc::ntohl:
- case LibFunc::ntohs:
+ case LibFunc_htonl:
+ case LibFunc_htons:
+ case LibFunc_ntohl:
+ case LibFunc_ntohs:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAccessMemory(F);
return Changed;
- case LibFunc::lstat:
+ case LibFunc_lstat:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::lchown:
+ case LibFunc_lchown:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::qsort:
+ case LibFunc_qsort:
// May throw; places call through function pointer.
Changed |= setDoesNotCapture(F, 4);
return Changed;
- case LibFunc::dunder_strdup:
- case LibFunc::dunder_strndup:
+ case LibFunc_dunder_strdup:
+ case LibFunc_dunder_strndup:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::dunder_strtok_r:
+ case LibFunc_dunder_strtok_r:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::under_IO_getc:
+ case LibFunc_under_IO_getc:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::under_IO_putc:
+ case LibFunc_under_IO_putc:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::dunder_isoc99_scanf:
+ case LibFunc_dunder_isoc99_scanf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::stat64:
- case LibFunc::lstat64:
- case LibFunc::statvfs64:
+ case LibFunc_stat64:
+ case LibFunc_lstat64:
+ case LibFunc_statvfs64:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::dunder_isoc99_sscanf:
+ case LibFunc_dunder_isoc99_sscanf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::fopen64:
+ case LibFunc_fopen64:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotCapture(F, 1);
@@ -647,26 +647,26 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::fseeko64:
- case LibFunc::ftello64:
+ case LibFunc_fseeko64:
+ case LibFunc_ftello64:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::tmpfile64:
+ case LibFunc_tmpfile64:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
return Changed;
- case LibFunc::fstat64:
- case LibFunc::fstatvfs64:
+ case LibFunc_fstat64:
+ case LibFunc_fstatvfs64:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::open64:
+ case LibFunc_open64:
// May throw; "open" is a valid pthread cancellation point.
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::gettimeofday:
+ case LibFunc_gettimeofday:
// Currently some platforms have the restrict keyword on the arguments to
// gettimeofday. To be conservative, do not add noalias to gettimeofday's
// arguments.
@@ -674,29 +674,29 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::Znwj: // new(unsigned int)
- case LibFunc::Znwm: // new(unsigned long)
- case LibFunc::Znaj: // new[](unsigned int)
- case LibFunc::Znam: // new[](unsigned long)
- case LibFunc::msvc_new_int: // new(unsigned int)
- case LibFunc::msvc_new_longlong: // new(unsigned long long)
- case LibFunc::msvc_new_array_int: // new[](unsigned int)
- case LibFunc::msvc_new_array_longlong: // new[](unsigned long long)
+ case LibFunc_Znwj: // new(unsigned int)
+ case LibFunc_Znwm: // new(unsigned long)
+ case LibFunc_Znaj: // new[](unsigned int)
+ case LibFunc_Znam: // new[](unsigned long)
+ case LibFunc_msvc_new_int: // new(unsigned int)
+ case LibFunc_msvc_new_longlong: // new(unsigned long long)
+ case LibFunc_msvc_new_array_int: // new[](unsigned int)
+ case LibFunc_msvc_new_array_longlong: // new[](unsigned long long)
// Operator new always returns a nonnull noalias pointer
- Changed |= setNonNull(F, AttributeSet::ReturnIndex);
- Changed |= setDoesNotAlias(F, AttributeSet::ReturnIndex);
+ Changed |= setNonNull(F, AttributeList::ReturnIndex);
+ Changed |= setDoesNotAlias(F, AttributeList::ReturnIndex);
return Changed;
//TODO: add LibFunc entries for:
- //case LibFunc::memset_pattern4:
- //case LibFunc::memset_pattern8:
- case LibFunc::memset_pattern16:
+ //case LibFunc_memset_pattern4:
+ //case LibFunc_memset_pattern8:
+ case LibFunc_memset_pattern16:
Changed |= setOnlyAccessesArgMemory(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
// int __nvvm_reflect(const char *)
- case LibFunc::nvvm_reflect:
+ case LibFunc_nvvm_reflect:
Changed |= setDoesNotAccessMemory(F);
Changed |= setDoesNotThrow(F);
return Changed;
@@ -717,13 +717,13 @@ Value *llvm::castToCStr(Value *V, IRBuilder<> &B) {
Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::strlen))
+ if (!TLI->has(LibFunc_strlen))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
Constant *StrLen = M->getOrInsertFunction("strlen", DL.getIntPtrType(Context),
- B.getInt8PtrTy(), nullptr);
+ B.getInt8PtrTy());
inferLibFuncAttributes(*M->getFunction("strlen"), *TLI);
CallInst *CI = B.CreateCall(StrLen, castToCStr(Ptr, B), "strlen");
if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
@@ -734,14 +734,14 @@ Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::strchr))
+ if (!TLI->has(LibFunc_strchr))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
Type *I8Ptr = B.getInt8PtrTy();
Type *I32Ty = B.getInt32Ty();
Constant *StrChr =
- M->getOrInsertFunction("strchr", I8Ptr, I8Ptr, I32Ty, nullptr);
+ M->getOrInsertFunction("strchr", I8Ptr, I8Ptr, I32Ty);
inferLibFuncAttributes(*M->getFunction("strchr"), *TLI);
CallInst *CI = B.CreateCall(
StrChr, {castToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, "strchr");
@@ -752,14 +752,14 @@ Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B,
Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::strncmp))
+ if (!TLI->has(LibFunc_strncmp))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *StrNCmp = M->getOrInsertFunction("strncmp", B.getInt32Ty(),
B.getInt8PtrTy(), B.getInt8PtrTy(),
- DL.getIntPtrType(Context), nullptr);
+ DL.getIntPtrType(Context));
inferLibFuncAttributes(*M->getFunction("strncmp"), *TLI);
CallInst *CI = B.CreateCall(
StrNCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, "strncmp");
@@ -772,12 +772,12 @@ Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
const TargetLibraryInfo *TLI, StringRef Name) {
- if (!TLI->has(LibFunc::strcpy))
+ if (!TLI->has(LibFunc_strcpy))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
Type *I8Ptr = B.getInt8PtrTy();
- Value *StrCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr, nullptr);
+ Value *StrCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr);
inferLibFuncAttributes(*M->getFunction(Name), *TLI);
CallInst *CI =
B.CreateCall(StrCpy, {castToCStr(Dst, B), castToCStr(Src, B)}, Name);
@@ -788,13 +788,13 @@ Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
const TargetLibraryInfo *TLI, StringRef Name) {
- if (!TLI->has(LibFunc::strncpy))
+ if (!TLI->has(LibFunc_strncpy))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
Type *I8Ptr = B.getInt8PtrTy();
Value *StrNCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr,
- Len->getType(), nullptr);
+ Len->getType());
inferLibFuncAttributes(*M->getFunction(Name), *TLI);
CallInst *CI = B.CreateCall(
StrNCpy, {castToCStr(Dst, B), castToCStr(Src, B), Len}, "strncpy");
@@ -806,18 +806,18 @@ Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
IRBuilder<> &B, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::memcpy_chk))
+ if (!TLI->has(LibFunc_memcpy_chk))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
- AttributeSet AS;
- AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
+ AttributeList AS;
+ AS = AttributeList::get(M->getContext(), AttributeList::FunctionIndex,
+ Attribute::NoUnwind);
LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemCpy = M->getOrInsertFunction(
- "__memcpy_chk", AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(),
+ "__memcpy_chk", AttributeList::get(M->getContext(), AS), B.getInt8PtrTy(),
B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context),
- DL.getIntPtrType(Context), nullptr);
+ DL.getIntPtrType(Context));
Dst = castToCStr(Dst, B);
Src = castToCStr(Src, B);
CallInst *CI = B.CreateCall(MemCpy, {Dst, Src, Len, ObjSize});
@@ -828,14 +828,14 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::memchr))
+ if (!TLI->has(LibFunc_memchr))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemChr = M->getOrInsertFunction("memchr", B.getInt8PtrTy(),
B.getInt8PtrTy(), B.getInt32Ty(),
- DL.getIntPtrType(Context), nullptr);
+ DL.getIntPtrType(Context));
inferLibFuncAttributes(*M->getFunction("memchr"), *TLI);
CallInst *CI = B.CreateCall(MemChr, {castToCStr(Ptr, B), Val, Len}, "memchr");
@@ -847,14 +847,14 @@ Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::memcmp))
+ if (!TLI->has(LibFunc_memcmp))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemCmp = M->getOrInsertFunction("memcmp", B.getInt32Ty(),
B.getInt8PtrTy(), B.getInt8PtrTy(),
- DL.getIntPtrType(Context), nullptr);
+ DL.getIntPtrType(Context));
inferLibFuncAttributes(*M->getFunction("memcmp"), *TLI);
CallInst *CI = B.CreateCall(
MemCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, "memcmp");
@@ -881,13 +881,13 @@ static void appendTypeSuffix(Value *Op, StringRef &Name,
}
Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
- const AttributeSet &Attrs) {
+ const AttributeList &Attrs) {
SmallString<20> NameBuffer;
appendTypeSuffix(Op, Name, NameBuffer);
Module *M = B.GetInsertBlock()->getModule();
Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
- Op->getType(), nullptr);
+ Op->getType());
CallInst *CI = B.CreateCall(Callee, Op, Name);
CI->setAttributes(Attrs);
if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
@@ -897,13 +897,13 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
}
Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
- IRBuilder<> &B, const AttributeSet &Attrs) {
+ IRBuilder<> &B, const AttributeList &Attrs) {
SmallString<20> NameBuffer;
appendTypeSuffix(Op1, Name, NameBuffer);
Module *M = B.GetInsertBlock()->getModule();
Value *Callee = M->getOrInsertFunction(Name, Op1->getType(), Op1->getType(),
- Op2->getType(), nullptr);
+ Op2->getType());
CallInst *CI = B.CreateCall(Callee, {Op1, Op2}, Name);
CI->setAttributes(Attrs);
if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
@@ -914,12 +914,12 @@ Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::putchar))
+ if (!TLI->has(LibFunc_putchar))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
- Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(),
- B.getInt32Ty(), nullptr);
+ Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(), B.getInt32Ty());
+ inferLibFuncAttributes(*M->getFunction("putchar"), *TLI);
CallInst *CI = B.CreateCall(PutChar,
B.CreateIntCast(Char,
B.getInt32Ty(),
@@ -934,12 +934,12 @@ Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B,
Value *llvm::emitPutS(Value *Str, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::puts))
+ if (!TLI->has(LibFunc_puts))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
Value *PutS =
- M->getOrInsertFunction("puts", B.getInt32Ty(), B.getInt8PtrTy(), nullptr);
+ M->getOrInsertFunction("puts", B.getInt32Ty(), B.getInt8PtrTy());
inferLibFuncAttributes(*M->getFunction("puts"), *TLI);
CallInst *CI = B.CreateCall(PutS, castToCStr(Str, B), "puts");
if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
@@ -949,12 +949,12 @@ Value *llvm::emitPutS(Value *Str, IRBuilder<> &B,
Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::fputc))
+ if (!TLI->has(LibFunc_fputc))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
Constant *F = M->getOrInsertFunction("fputc", B.getInt32Ty(), B.getInt32Ty(),
- File->getType(), nullptr);
+ File->getType());
if (File->getType()->isPointerTy())
inferLibFuncAttributes(*M->getFunction("fputc"), *TLI);
Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
@@ -968,13 +968,13 @@ Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilder<> &B,
Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::fputs))
+ if (!TLI->has(LibFunc_fputs))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
- StringRef FPutsName = TLI->getName(LibFunc::fputs);
+ StringRef FPutsName = TLI->getName(LibFunc_fputs);
Constant *F = M->getOrInsertFunction(
- FPutsName, B.getInt32Ty(), B.getInt8PtrTy(), File->getType(), nullptr);
+ FPutsName, B.getInt32Ty(), B.getInt8PtrTy(), File->getType());
if (File->getType()->isPointerTy())
inferLibFuncAttributes(*M->getFunction(FPutsName), *TLI);
CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, "fputs");
@@ -986,16 +986,16 @@ Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B,
Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::fwrite))
+ if (!TLI->has(LibFunc_fwrite))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
- StringRef FWriteName = TLI->getName(LibFunc::fwrite);
+ StringRef FWriteName = TLI->getName(LibFunc_fwrite);
Constant *F = M->getOrInsertFunction(
FWriteName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
- DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType(),
- nullptr);
+ DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());
+
if (File->getType()->isPointerTy())
inferLibFuncAttributes(*M->getFunction(FWriteName), *TLI);
CallInst *CI =
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index bc2cef26edcb..1cfe3bd53648 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -17,6 +17,8 @@
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
@@ -36,12 +38,21 @@ namespace {
: SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
};
- struct DivPhiNodes {
- PHINode *Quotient;
- PHINode *Remainder;
+ struct QuotRemPair {
+ Value *Quotient;
+ Value *Remainder;
- DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
- : Quotient(InQuotient), Remainder(InRemainder) {}
+ QuotRemPair(Value *InQuotient, Value *InRemainder)
+ : Quotient(InQuotient), Remainder(InRemainder) {}
+ };
+
+ /// A quotient and remainder, plus a BB from which they logically "originate".
+ /// If you use Quotient or Remainder in a Phi node, you should use BB as its
+ /// corresponding predecessor.
+ struct QuotRemWithBB {
+ BasicBlock *BB = nullptr;
+ Value *Quotient = nullptr;
+ Value *Remainder = nullptr;
};
}
@@ -69,159 +80,376 @@ namespace llvm {
}
};
- typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
+ typedef DenseMap<DivOpInfo, QuotRemPair> DivCacheTy;
+ typedef DenseMap<unsigned, unsigned> BypassWidthsTy;
+ typedef SmallPtrSet<Instruction *, 4> VisitedSetTy;
}
-// insertFastDiv - Substitutes the div/rem instruction with code that checks the
-// value of the operands and uses a shorter-faster div/rem instruction when
-// possible and the longer-slower div/rem instruction otherwise.
-static bool insertFastDiv(Instruction *I, IntegerType *BypassType,
- bool UseDivOp, bool UseSignedOp,
- DivCacheTy &PerBBDivCache) {
- Function *F = I->getParent()->getParent();
- // Get instruction operands
- Value *Dividend = I->getOperand(0);
- Value *Divisor = I->getOperand(1);
+namespace {
+enum ValueRange {
+ /// Operand definitely fits into BypassType. No runtime checks are needed.
+ VALRNG_KNOWN_SHORT,
+ /// A runtime check is required, as value range is unknown.
+ VALRNG_UNKNOWN,
+ /// Operand is unlikely to fit into BypassType. The bypassing should be
+ /// disabled.
+ VALRNG_LIKELY_LONG
+};
+
+class FastDivInsertionTask {
+ bool IsValidTask = false;
+ Instruction *SlowDivOrRem = nullptr;
+ IntegerType *BypassType = nullptr;
+ BasicBlock *MainBB = nullptr;
+
+ bool isHashLikeValue(Value *V, VisitedSetTy &Visited);
+ ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);
+ QuotRemWithBB createSlowBB(BasicBlock *Successor);
+ QuotRemWithBB createFastBB(BasicBlock *Successor);
+ QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
+ BasicBlock *PhiBB);
+ Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2);
+ Optional<QuotRemPair> insertFastDivAndRem();
+
+ bool isSignedOp() {
+ return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
+ SlowDivOrRem->getOpcode() == Instruction::SRem;
+ }
+ bool isDivisionOp() {
+ return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
+ SlowDivOrRem->getOpcode() == Instruction::UDiv;
+ }
+ Type *getSlowType() { return SlowDivOrRem->getType(); }
+
+public:
+ FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
+ Value *getReplacement(DivCacheTy &Cache);
+};
+} // anonymous namespace
+
+FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
+ const BypassWidthsTy &BypassWidths) {
+ switch (I->getOpcode()) {
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ SlowDivOrRem = I;
+ break;
+ default:
+ // I is not a div/rem operation.
+ return;
+ }
- if (isa<ConstantInt>(Divisor)) {
- // Division by a constant should have been been solved and replaced earlier
- // in the pipeline.
- return false;
+ // Skip division on vector types. Only optimize integer instructions.
+ IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
+ if (!SlowType)
+ return;
+
+ // Skip if this bitwidth is not bypassed.
+ auto BI = BypassWidths.find(SlowType->getBitWidth());
+ if (BI == BypassWidths.end())
+ return;
+
+ // Get type for div/rem instruction with bypass bitwidth.
+ IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
+ BypassType = BT;
+
+ // The original basic block.
+ MainBB = I->getParent();
+
+ // The instruction is indeed a slow div or rem operation.
+ IsValidTask = true;
+}
+
+/// Reuses previously-computed dividend or remainder from the current BB if
+/// operands and operation are identical. Otherwise calls insertFastDivAndRem to
+/// perform the optimization and caches the resulting dividend and remainder.
+/// If no replacement can be generated, nullptr is returned.
+Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
+ // First, make sure that the task is valid.
+ if (!IsValidTask)
+ return nullptr;
+
+ // Then, look for a value in Cache.
+ Value *Dividend = SlowDivOrRem->getOperand(0);
+ Value *Divisor = SlowDivOrRem->getOperand(1);
+ DivOpInfo Key(isSignedOp(), Dividend, Divisor);
+ auto CacheI = Cache.find(Key);
+
+ if (CacheI == Cache.end()) {
+ // If previous instance does not exist, try to insert fast div.
+ Optional<QuotRemPair> OptResult = insertFastDivAndRem();
+ // Bail out if insertFastDivAndRem has failed.
+ if (!OptResult)
+ return nullptr;
+ CacheI = Cache.insert({Key, *OptResult}).first;
}
- // If the numerator is a constant, bail if it doesn't fit into BypassType.
- if (ConstantInt *ConstDividend = dyn_cast<ConstantInt>(Dividend))
- if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth())
+ QuotRemPair &Value = CacheI->second;
+ return isDivisionOp() ? Value.Quotient : Value.Remainder;
+}
+
+/// \brief Check if a value looks like a hash.
+///
+/// The routine is expected to detect values computed using the most common hash
+/// algorithms. Typically, hash computations end with one of the following
+/// instructions:
+///
+/// 1) MUL with a constant wider than BypassType
+/// 2) XOR instruction
+///
+/// And even if we are wrong and the value is not a hash, it is still quite
+/// unlikely that such values will fit into BypassType.
+///
+/// To detect string hash algorithms like FNV we have to look through PHI-nodes.
+/// It is implemented as a depth-first search for values that look neither long
+/// nor hash-like.
+bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ switch (I->getOpcode()) {
+ case Instruction::Xor:
+ return true;
+ case Instruction::Mul: {
+ // After Constant Hoisting pass, long constants may be represented as
+ // bitcast instructions. As a result, some constants may look like an
+ // instruction at first, and an additional check is necessary to find out if
+ // an operand is actually a constant.
+ Value *Op1 = I->getOperand(1);
+ ConstantInt *C = dyn_cast<ConstantInt>(Op1);
+ if (!C && isa<BitCastInst>(Op1))
+ C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
+ return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth();
+ }
+ case Instruction::PHI: {
+ // Stop IR traversal in case of a crazy input code. This limits recursion
+ // depth.
+ if (Visited.size() >= 16)
return false;
+ // Do not visit nodes that have been visited already. We return true because
+ // it means that we couldn't find any value that doesn't look hash-like.
+ if (Visited.find(I) != Visited.end())
+ return true;
+ Visited.insert(I);
+ return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {
+ // Ignore undef values as they probably don't affect the division
+ // operands.
+ return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
+ isa<UndefValue>(V);
+ });
+ }
+ default:
+ return false;
+ }
+}
+
+/// Check if an integer value fits into our bypass type.
+ValueRange FastDivInsertionTask::getValueRange(Value *V,
+ VisitedSetTy &Visited) {
+ unsigned ShortLen = BypassType->getBitWidth();
+ unsigned LongLen = V->getType()->getIntegerBitWidth();
+
+ assert(LongLen > ShortLen && "Value type must be wider than BypassType");
+ unsigned HiBits = LongLen - ShortLen;
+
+ const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout();
+ APInt Zeros(LongLen, 0), Ones(LongLen, 0);
- // Basic Block is split before divide
- BasicBlock *MainBB = &*I->getParent();
- BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I);
-
- // Add new basic block for slow divide operation
- BasicBlock *SlowBB =
- BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
- SlowBB->moveBefore(SuccessorBB);
- IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
- Value *SlowQuotientV;
- Value *SlowRemainderV;
- if (UseSignedOp) {
- SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
- SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
+ computeKnownBits(V, Zeros, Ones, DL);
+
+ if (Zeros.countLeadingOnes() >= HiBits)
+ return VALRNG_KNOWN_SHORT;
+
+ if (Ones.countLeadingZeros() < HiBits)
+ return VALRNG_LIKELY_LONG;
+
+ // Long integer divisions are often used in hashtable implementations. It's
+ // not worth bypassing such divisions because hash values are extremely
+ // unlikely to have enough leading zeros. The call below tries to detect
+ // values that are unlikely to fit BypassType (including hashes).
+ if (isHashLikeValue(V, Visited))
+ return VALRNG_LIKELY_LONG;
+
+ return VALRNG_UNKNOWN;
+}
+
+/// Add new basic block for slow div and rem operations and put it before
+/// SuccessorBB.
+QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
+ QuotRemWithBB DivRemPair;
+ DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
+ MainBB->getParent(), SuccessorBB);
+ IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
+
+ Value *Dividend = SlowDivOrRem->getOperand(0);
+ Value *Divisor = SlowDivOrRem->getOperand(1);
+
+ if (isSignedOp()) {
+ DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
+ DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
} else {
- SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
- SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
+ DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
+ DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
}
- SlowBuilder.CreateBr(SuccessorBB);
-
- // Add new basic block for fast divide operation
- BasicBlock *FastBB =
- BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
- FastBB->moveBefore(SlowBB);
- IRBuilder<> FastBuilder(FastBB, FastBB->begin());
- Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
- BypassType);
- Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
- BypassType);
-
- // udiv/urem because optimization only handles positive numbers
- Value *ShortQuotientV = FastBuilder.CreateUDiv(ShortDividendV, ShortDivisorV);
- Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
- ShortDivisorV);
- Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
- ShortQuotientV,
- Dividend->getType());
- Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
- ShortRemainderV,
- Dividend->getType());
- FastBuilder.CreateBr(SuccessorBB);
-
- // Phi nodes for result of div and rem
- IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
- PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
- QuoPhi->addIncoming(SlowQuotientV, SlowBB);
- QuoPhi->addIncoming(FastQuotientV, FastBB);
- PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
- RemPhi->addIncoming(SlowRemainderV, SlowBB);
- RemPhi->addIncoming(FastRemainderV, FastBB);
-
- // Replace I with appropriate phi node
- if (UseDivOp)
- I->replaceAllUsesWith(QuoPhi);
- else
- I->replaceAllUsesWith(RemPhi);
- I->eraseFromParent();
- // Combine operands into a single value with OR for value testing below
- MainBB->getInstList().back().eraseFromParent();
- IRBuilder<> MainBuilder(MainBB, MainBB->end());
+ Builder.CreateBr(SuccessorBB);
+ return DivRemPair;
+}
+
+/// Add new basic block for fast div and rem operations and put it before
+/// SuccessorBB.
+QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
+ QuotRemWithBB DivRemPair;
+ DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
+ MainBB->getParent(), SuccessorBB);
+ IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
+
+ Value *Dividend = SlowDivOrRem->getOperand(0);
+ Value *Divisor = SlowDivOrRem->getOperand(1);
+ Value *ShortDivisorV =
+ Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
+ Value *ShortDividendV =
+ Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
+
+ // udiv/urem because this optimization only handles positive numbers.
+ Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
+ Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
+ DivRemPair.Quotient =
+ Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
+ DivRemPair.Remainder =
+ Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
+ Builder.CreateBr(SuccessorBB);
+
+ return DivRemPair;
+}
- // We should have bailed out above if the divisor is a constant, but the
- // dividend may still be a constant. Set OrV to our non-constant operands
- // OR'ed together.
- assert(!isa<ConstantInt>(Divisor));
+/// Creates Phi nodes for result of Div and Rem.
+QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
+ QuotRemWithBB &RHS,
+ BasicBlock *PhiBB) {
+ IRBuilder<> Builder(PhiBB, PhiBB->begin());
+ PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
+ QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
+ QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
+ PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
+ RemPhi->addIncoming(LHS.Remainder, LHS.BB);
+ RemPhi->addIncoming(RHS.Remainder, RHS.BB);
+ return QuotRemPair(QuoPhi, RemPhi);
+}
+
+/// Creates a runtime check to test whether both the divisor and dividend fit
+/// into BypassType. The check is inserted at the end of MainBB. True return
+/// value means that the operands fit. Either of the operands may be NULL if it
+/// doesn't need a runtime check.
+Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {
+ assert((Op1 || Op2) && "Nothing to check");
+ IRBuilder<> Builder(MainBB, MainBB->end());
Value *OrV;
- if (!isa<ConstantInt>(Dividend))
- OrV = MainBuilder.CreateOr(Dividend, Divisor);
+ if (Op1 && Op2)
+ OrV = Builder.CreateOr(Op1, Op2);
else
- OrV = Divisor;
+ OrV = Op1 ? Op1 : Op2;
// BitMask is inverted to check if the operands are
// larger than the bypass type
uint64_t BitMask = ~BypassType->getBitMask();
- Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
-
- // Compare operand values and branch
- Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
- Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
- MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
-
- // Cache phi nodes to be used later in place of other instances
- // of div or rem with the same sign, dividend, and divisor
- DivOpInfo Key(UseSignedOp, Dividend, Divisor);
- DivPhiNodes Value(QuoPhi, RemPhi);
- PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
- return true;
+ Value *AndV = Builder.CreateAnd(OrV, BitMask);
+
+ // Compare operand values
+ Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
+ return Builder.CreateICmpEQ(AndV, ZeroV);
}
-// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from
-// the current BB if operands and operation are identical. Otherwise calls
-// insertFastDiv to perform the optimization and caches the resulting dividend
-// and remainder.
-static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType,
- bool UseDivOp, bool UseSignedOp,
- DivCacheTy &PerBBDivCache) {
- // Get instruction operands
- DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1));
- DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
-
- if (CacheI == PerBBDivCache.end()) {
- // If previous instance does not exist, insert fast div
- return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache);
+/// Substitutes the div/rem instruction with code that checks the value of the
+/// operands and uses a shorter-faster div/rem instruction when possible.
+Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
+ Value *Dividend = SlowDivOrRem->getOperand(0);
+ Value *Divisor = SlowDivOrRem->getOperand(1);
+
+ if (isa<ConstantInt>(Divisor)) {
+ // Keep division by a constant for DAGCombiner.
+ return None;
}
- // Replace operation value with previously generated phi node
- DivPhiNodes &Value = CacheI->second;
- if (UseDivOp) {
- // Replace all uses of div instruction with quotient phi node
- I->replaceAllUsesWith(Value.Quotient);
+ VisitedSetTy SetL;
+ ValueRange DividendRange = getValueRange(Dividend, SetL);
+ if (DividendRange == VALRNG_LIKELY_LONG)
+ return None;
+
+ VisitedSetTy SetR;
+ ValueRange DivisorRange = getValueRange(Divisor, SetR);
+ if (DivisorRange == VALRNG_LIKELY_LONG)
+ return None;
+
+ bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
+ bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
+
+ if (DividendShort && DivisorShort) {
+ // If both operands are known to be short then just replace the long
+ // division with a short one in-place.
+
+ IRBuilder<> Builder(SlowDivOrRem);
+ Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
+ Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
+ Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
+ Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
+ Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
+ Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
+ return QuotRemPair(ExtDiv, ExtRem);
+ } else if (DividendShort && !isSignedOp()) {
+ // If the division is unsigned and Dividend is known to be short, then
+ // either
+ // 1) Divisor is less or equal to Dividend, and the result can be computed
+ // with a short division.
+ // 2) Divisor is greater than Dividend. In this case, no division is needed
+ // at all: The quotient is 0 and the remainder is equal to Dividend.
+ //
+ // So instead of checking at runtime whether Divisor fits into BypassType,
+ // we emit a runtime check to differentiate between these two cases. This
+ // lets us entirely avoid a long div.
+
+ // Split the basic block before the div/rem.
+ BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
+ // Remove the unconditional branch from MainBB to SuccessorBB.
+ MainBB->getInstList().back().eraseFromParent();
+ QuotRemWithBB Long;
+ Long.BB = MainBB;
+ Long.Quotient = ConstantInt::get(getSlowType(), 0);
+ Long.Remainder = Dividend;
+ QuotRemWithBB Fast = createFastBB(SuccessorBB);
+ QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
+ IRBuilder<> Builder(MainBB, MainBB->end());
+ Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
+ Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
+ return Result;
} else {
- // Replace all uses of rem instruction with remainder phi node
- I->replaceAllUsesWith(Value.Remainder);
+ // General case. Create both slow and fast div/rem pairs and choose one of
+ // them at runtime.
+
+ // Split the basic block before the div/rem.
+ BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
+ // Remove the unconditional branch from MainBB to SuccessorBB.
+ MainBB->getInstList().back().eraseFromParent();
+ QuotRemWithBB Fast = createFastBB(SuccessorBB);
+ QuotRemWithBB Slow = createSlowBB(SuccessorBB);
+ QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
+ Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend,
+ DivisorShort ? nullptr : Divisor);
+ IRBuilder<> Builder(MainBB, MainBB->end());
+ Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
+ return Result;
}
-
- // Remove redundant operation
- I->eraseFromParent();
- return true;
}
-// bypassSlowDivision - This optimization identifies DIV instructions in a BB
-// that can be profitably bypassed and carried out with a shorter, faster
-// divide.
-bool llvm::bypassSlowDivision(
- BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) {
- DivCacheTy DivCache;
+/// This optimization identifies DIV/REM instructions in a BB that can be
+/// profitably bypassed and carried out with a shorter, faster divide.
+bool llvm::bypassSlowDivision(BasicBlock *BB,
+ const BypassWidthsTy &BypassWidths) {
+ DivCacheTy PerBBDivCache;
bool MadeChange = false;
Instruction* Next = &*BB->begin();
@@ -231,42 +459,20 @@ bool llvm::bypassSlowDivision(
Instruction* I = Next;
Next = Next->getNextNode();
- // Get instruction details
- unsigned Opcode = I->getOpcode();
- bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
- bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
- bool UseSignedOp = Opcode == Instruction::SDiv ||
- Opcode == Instruction::SRem;
-
- // Only optimize div or rem ops
- if (!UseDivOp && !UseRemOp)
- continue;
-
- // Skip division on vector types, only optimize integer instructions
- if (!I->getType()->isIntegerTy())
- continue;
-
- // Get bitwidth of div/rem instruction
- IntegerType *T = cast<IntegerType>(I->getType());
- unsigned int bitwidth = T->getBitWidth();
-
- // Continue if bitwidth is not bypassed
- DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
- if (BI == BypassWidths.end())
- continue;
-
- // Get type for div/rem instruction with bypass bitwidth
- IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
-
- MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache);
+ FastDivInsertionTask Task(I, BypassWidths);
+ if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
+ I->replaceAllUsesWith(Replacement);
+ I->eraseFromParent();
+ MadeChange = true;
+ }
}
// Above we eagerly create divs and rems, as pairs, so that we can efficiently
// create divrem machine instructions. Now erase any unused divs / rems so we
// don't leave extra instructions sitting around.
- for (auto &KV : DivCache)
- for (Instruction *Phi : {KV.second.Quotient, KV.second.Remainder})
- RecursivelyDeleteTriviallyDeadInstructions(Phi);
+ for (auto &KV : PerBBDivCache)
+ for (Value *V : {KV.second.Quotient, KV.second.Remainder})
+ RecursivelyDeleteTriviallyDeadInstructions(V);
return MadeChange;
}
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 69889ec72f90..7a21c03da221 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -31,12 +31,13 @@ add_llvm_library(LLVMTransformUtils
LoopUtils.cpp
LoopVersioning.cpp
LowerInvoke.cpp
+ LowerMemIntrinsics.cpp
LowerSwitch.cpp
Mem2Reg.cpp
- MemorySSA.cpp
MetaRenamer.cpp
ModuleUtils.cpp
NameAnonGlobals.cpp
+ PredicateInfo.cpp
PromoteMemoryToRegister.cpp
StripGCRelocates.cpp
SSAUpdater.cpp
@@ -51,6 +52,7 @@ add_llvm_library(LLVMTransformUtils
UnifyFunctionExitNodes.cpp
Utils.cpp
ValueMapper.cpp
+ VNCoercion.cpp
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 4d33e22fecfb..385c12302e04 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -90,9 +90,9 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
assert(VMap.count(&I) && "No mapping from source argument specified!");
#endif
- // Copy all attributes other than those stored in the AttributeSet. We need
- // to remap the parameter indices of the AttributeSet.
- AttributeSet NewAttrs = NewFunc->getAttributes();
+ // Copy all attributes other than those stored in the AttributeList. We need
+ // to remap the parameter indices of the AttributeList.
+ AttributeList NewAttrs = NewFunc->getAttributes();
NewFunc->copyAttributesFrom(OldFunc);
NewFunc->setAttributes(NewAttrs);
@@ -103,22 +103,20 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
TypeMapper, Materializer));
- AttributeSet OldAttrs = OldFunc->getAttributes();
+ SmallVector<AttributeSet, 4> NewArgAttrs(NewFunc->arg_size());
+ AttributeList OldAttrs = OldFunc->getAttributes();
+
// Clone any argument attributes that are present in the VMap.
- for (const Argument &OldArg : OldFunc->args())
+ for (const Argument &OldArg : OldFunc->args()) {
if (Argument *NewArg = dyn_cast<Argument>(VMap[&OldArg])) {
- AttributeSet attrs =
- OldAttrs.getParamAttributes(OldArg.getArgNo() + 1);
- if (attrs.getNumSlots() > 0)
- NewArg->addAttr(attrs);
+ NewArgAttrs[NewArg->getArgNo()] =
+ OldAttrs.getParamAttributes(OldArg.getArgNo());
}
+ }
NewFunc->setAttributes(
- NewFunc->getAttributes()
- .addAttributes(NewFunc->getContext(), AttributeSet::ReturnIndex,
- OldAttrs.getRetAttributes())
- .addAttributes(NewFunc->getContext(), AttributeSet::FunctionIndex,
- OldAttrs.getFnAttributes()));
+ AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttributes(),
+ OldAttrs.getRetAttributes(), NewArgAttrs));
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
OldFunc->getAllMetadata(MDs);
@@ -353,7 +351,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
Cond = dyn_cast_or_null<ConstantInt>(V);
}
if (Cond) { // Constant fold to uncond branch!
- SwitchInst::ConstCaseIt Case = SI->findCaseValue(Cond);
+ SwitchInst::ConstCaseHandle Case = *SI->findCaseValue(Cond);
BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor());
VMap[OldTI] = BranchInst::Create(Dest, NewBB);
ToClone.push_back(Dest);
@@ -747,3 +745,40 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
return NewLoop;
}
+
+/// \brief Duplicate non-Phi instructions from the beginning of block up to
+/// StopAt instruction into a split block between BB and its predecessor.
+BasicBlock *
+llvm::DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB,
+ Instruction *StopAt,
+ ValueToValueMapTy &ValueMapping) {
+ // We are going to have to map operands from the original BB block to the new
+ // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
+ // account for entry from PredBB.
+ BasicBlock::iterator BI = BB->begin();
+ for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+ ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
+
+ BasicBlock *NewBB = SplitEdge(PredBB, BB);
+ NewBB->setName(PredBB->getName() + ".split");
+ Instruction *NewTerm = NewBB->getTerminator();
+
+ // Clone the non-phi instructions of BB into NewBB, keeping track of the
+ // mapping and using it to remap operands in the cloned instructions.
+ for (; StopAt != &*BI; ++BI) {
+ Instruction *New = BI->clone();
+ New->setName(BI->getName());
+ New->insertBefore(NewTerm);
+ ValueMapping[&*BI] = New;
+
+ // Remap operands to patch up intra-block references.
+ for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
+ if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
+ auto I = ValueMapping.find(Inst);
+ if (I != ValueMapping.end())
+ New->setOperand(i, I->second);
+ }
+ }
+
+ return NewBB;
+}
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 7ebeb615d248..4e9d67252d6c 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -20,6 +20,15 @@
#include "llvm-c/Core.h"
using namespace llvm;
+static void copyComdat(GlobalObject *Dst, const GlobalObject *Src) {
+ const Comdat *SC = Src->getComdat();
+ if (!SC)
+ return;
+ Comdat *DC = Dst->getParent()->getOrInsertComdat(SC->getName());
+ DC->setSelectionKind(SC->getSelectionKind());
+ Dst->setComdat(DC);
+}
+
/// This is not as easy as it might seem because we have to worry about making
/// copies of global variables and functions, and making their (initializers and
/// references, respectively) refer to the right globals.
@@ -124,6 +133,8 @@ std::unique_ptr<Module> llvm::CloneModule(
I->getAllMetadata(MDs);
for (auto MD : MDs)
GV->addMetadata(MD.first, *MapMetadata(MD.second, VMap));
+
+ copyComdat(GV, &*I);
}
// Similarly, copy over function bodies now...
@@ -153,6 +164,8 @@ std::unique_ptr<Module> llvm::CloneModule(
if (I.hasPersonalityFn())
F->setPersonalityFn(MapValue(I.getPersonalityFn(), VMap));
+
+ copyComdat(F, &I);
}
// And aliases
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index c514c9c9cd4a..644d93b727b3 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -362,9 +362,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
// "target-features" attribute allowing it to be lowered.
// FIXME: This should be changed to check to see if a specific
// attribute can not be inherited.
- AttributeSet OldFnAttrs = oldFunction->getAttributes().getFnAttributes();
- AttrBuilder AB(OldFnAttrs, AttributeSet::FunctionIndex);
- for (auto Attr : AB.td_attrs())
+ AttrBuilder AB(oldFunction->getAttributes().getFnAttributes());
+ for (const auto &Attr : AB.td_attrs())
newFunction->addFnAttr(Attr.first, Attr.second);
newFunction->getBasicBlockList().push_back(newRootNode);
@@ -440,8 +439,10 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
// Emit a call to the new function, passing in: *pointer to struct (if
// aggregating parameters), or plan inputs and allocated memory for outputs
std::vector<Value*> params, StructValues, ReloadOutputs, Reloads;
-
- LLVMContext &Context = newFunction->getContext();
+
+ Module *M = newFunction->getParent();
+ LLVMContext &Context = M->getContext();
+ const DataLayout &DL = M->getDataLayout();
// Add inputs as params, or to be filled into the struct
for (Value *input : inputs)
@@ -456,8 +457,9 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
StructValues.push_back(output);
} else {
AllocaInst *alloca =
- new AllocaInst(output->getType(), nullptr, output->getName() + ".loc",
- &codeReplacer->getParent()->front().front());
+ new AllocaInst(output->getType(), DL.getAllocaAddrSpace(),
+ nullptr, output->getName() + ".loc",
+ &codeReplacer->getParent()->front().front());
ReloadOutputs.push_back(alloca);
params.push_back(alloca);
}
@@ -473,7 +475,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
// Allocate a struct at the beginning of this function
StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
- Struct = new AllocaInst(StructArgTy, nullptr, "structArg",
+ Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr,
+ "structArg",
&codeReplacer->getParent()->front().front());
params.push_back(Struct);
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index 75a1dde57c4c..0eee6e19efac 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -28,15 +28,17 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
return nullptr;
}
+ Function *F = I.getParent()->getParent();
+ const DataLayout &DL = F->getParent()->getDataLayout();
+
// Create a stack slot to hold the value.
AllocaInst *Slot;
if (AllocaPoint) {
- Slot = new AllocaInst(I.getType(), nullptr,
+ Slot = new AllocaInst(I.getType(), DL.getAllocaAddrSpace(), nullptr,
I.getName()+".reg2mem", AllocaPoint);
} else {
- Function *F = I.getParent()->getParent();
- Slot = new AllocaInst(I.getType(), nullptr, I.getName() + ".reg2mem",
- &F->getEntryBlock().front());
+ Slot = new AllocaInst(I.getType(), DL.getAllocaAddrSpace(), nullptr,
+ I.getName() + ".reg2mem", &F->getEntryBlock().front());
}
// We cannot demote invoke instructions to the stack if their normal edge
@@ -110,14 +112,17 @@ AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
return nullptr;
}
+ const DataLayout &DL = P->getModule()->getDataLayout();
+
// Create a stack slot to hold the value.
AllocaInst *Slot;
if (AllocaPoint) {
- Slot = new AllocaInst(P->getType(), nullptr,
+ Slot = new AllocaInst(P->getType(), DL.getAllocaAddrSpace(), nullptr,
P->getName()+".reg2mem", AllocaPoint);
} else {
Function *F = P->getParent()->getParent();
- Slot = new AllocaInst(P->getType(), nullptr, P->getName() + ".reg2mem",
+ Slot = new AllocaInst(P->getType(), DL.getAllocaAddrSpace(), nullptr,
+ P->getName() + ".reg2mem",
&F->getEntryBlock().front());
}
diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp
index 4adf1754253d..59f176e2f231 100644
--- a/lib/Transforms/Utils/Evaluator.cpp
+++ b/lib/Transforms/Utils/Evaluator.cpp
@@ -16,6 +16,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/GlobalVariable.h"
@@ -486,7 +487,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
ConstantInt *Val =
dyn_cast<ConstantInt>(getVal(SI->getCondition()));
if (!Val) return false; // Cannot determine.
- NextBB = SI->findCaseValue(Val).getCaseSuccessor();
+ NextBB = SI->findCaseValue(Val)->getCaseSuccessor();
} else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
diff --git a/lib/Transforms/Utils/FunctionComparator.cpp b/lib/Transforms/Utils/FunctionComparator.cpp
index 81a7c4ceffab..73a0b2737e95 100644
--- a/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/lib/Transforms/Utils/FunctionComparator.cpp
@@ -74,14 +74,14 @@ int FunctionComparator::cmpMem(StringRef L, StringRef R) const {
return L.compare(R);
}
-int FunctionComparator::cmpAttrs(const AttributeSet L,
- const AttributeSet R) const {
+int FunctionComparator::cmpAttrs(const AttributeList L,
+ const AttributeList R) const {
if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots()))
return Res;
for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) {
- AttributeSet::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i),
- RE = R.end(i);
+ AttributeList::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i),
+ RE = R.end(i);
for (; LI != LE && RI != RE; ++LI, ++RI) {
Attribute LA = *LI;
Attribute RA = *RI;
diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp
index 9844190ef84a..b00f4b14068a 100644
--- a/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -21,11 +21,11 @@ using namespace llvm;
/// Checks if we should import SGV as a definition, otherwise import as a
/// declaration.
bool FunctionImportGlobalProcessing::doImportAsDefinition(
- const GlobalValue *SGV, DenseSet<const GlobalValue *> *GlobalsToImport) {
+ const GlobalValue *SGV, SetVector<GlobalValue *> *GlobalsToImport) {
// For alias, we tie the definition to the base object. Extract it and recurse
if (auto *GA = dyn_cast<GlobalAlias>(SGV)) {
- if (GA->hasWeakAnyLinkage())
+ if (GA->isInterposable())
return false;
const GlobalObject *GO = GA->getBaseObject();
if (!GO->hasLinkOnceODRLinkage())
@@ -34,7 +34,7 @@ bool FunctionImportGlobalProcessing::doImportAsDefinition(
GO, GlobalsToImport);
}
// Only import the globals requested for importing.
- if (GlobalsToImport->count(SGV))
+ if (GlobalsToImport->count(const_cast<GlobalValue *>(SGV)))
return true;
// Otherwise no.
return false;
@@ -57,7 +57,8 @@ bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(
return false;
if (isPerformingImport()) {
- assert((!GlobalsToImport->count(SGV) || !isNonRenamableLocal(*SGV)) &&
+ assert((!GlobalsToImport->count(const_cast<GlobalValue *>(SGV)) ||
+ !isNonRenamableLocal(*SGV)) &&
"Attempting to promote non-renamable local");
// We don't know for sure yet if we are importing this value (as either
// a reference or a def), since we are simply walking all values in the
@@ -254,9 +255,8 @@ bool FunctionImportGlobalProcessing::run() {
return false;
}
-bool llvm::renameModuleForThinLTO(
- Module &M, const ModuleSummaryIndex &Index,
- DenseSet<const GlobalValue *> *GlobalsToImport) {
+bool llvm::renameModuleForThinLTO(Module &M, const ModuleSummaryIndex &Index,
+ SetVector<GlobalValue *> *GlobalsToImport) {
FunctionImportGlobalProcessing ThinLTOProcessing(M, Index, GlobalsToImport);
return ThinLTOProcessing.run();
}
diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp
index 74ebcda8355c..ba4b78ac758a 100644
--- a/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/lib/Transforms/Utils/GlobalStatus.cpp
@@ -10,9 +10,22 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include <algorithm>
+#include <cassert>
using namespace llvm;
@@ -175,13 +188,9 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
return false;
}
+GlobalStatus::GlobalStatus() = default;
+
bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) {
SmallPtrSet<const PHINode *, 16> PhiUsers;
return analyzeGlobalAux(V, GS, PhiUsers);
}
-
-GlobalStatus::GlobalStatus()
- : IsCompared(false), IsLoaded(false), StoredType(NotStored),
- StoredOnceValue(nullptr), AccessingFunction(nullptr),
- HasMultipleAccessingFunctions(false), HasNonInstructionUser(false),
- Ordering(AtomicOrdering::NotAtomic) {}
diff --git a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
index ed018bb73107..b8c12ad5ea84 100644
--- a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
+++ b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
@@ -62,6 +62,8 @@ void ImportedFunctionsInliningStatistics::recordInline(const Function &Caller,
void ImportedFunctionsInliningStatistics::setModuleInfo(const Module &M) {
ModuleName = M.getName();
for (const auto &F : M.functions()) {
+ if (F.isDeclaration())
+ continue;
AllFunctions++;
ImportedFunctions += int(F.getMetadata("thinlto_src_module") != nullptr);
}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index a40079ca8e76..5d6fbc3325ff 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -20,10 +20,12 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallSite.h"
@@ -40,8 +42,8 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
using namespace llvm;
@@ -1107,26 +1109,23 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
bool DTCalculated = false;
Function *CalledFunc = CS.getCalledFunction();
- for (Function::arg_iterator I = CalledFunc->arg_begin(),
- E = CalledFunc->arg_end();
- I != E; ++I) {
- unsigned Align = I->getType()->isPointerTy() ? I->getParamAlignment() : 0;
- if (Align && !I->hasByValOrInAllocaAttr() && !I->hasNUses(0)) {
+ for (Argument &Arg : CalledFunc->args()) {
+ unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0;
+ if (Align && !Arg.hasByValOrInAllocaAttr() && !Arg.hasNUses(0)) {
if (!DTCalculated) {
- DT.recalculate(const_cast<Function&>(*CS.getInstruction()->getParent()
- ->getParent()));
+ DT.recalculate(*CS.getCaller());
DTCalculated = true;
}
// If we can already prove the asserted alignment in the context of the
// caller, then don't bother inserting the assumption.
- Value *Arg = CS.getArgument(I->getArgNo());
- if (getKnownAlignment(Arg, DL, CS.getInstruction(), AC, &DT) >= Align)
+ Value *ArgVal = CS.getArgument(Arg.getArgNo());
+ if (getKnownAlignment(ArgVal, DL, CS.getInstruction(), AC, &DT) >= Align)
continue;
- CallInst *NewAssumption = IRBuilder<>(CS.getInstruction())
- .CreateAlignmentAssumption(DL, Arg, Align);
- AC->registerAssumption(NewAssumption);
+ CallInst *NewAsmp = IRBuilder<>(CS.getInstruction())
+ .CreateAlignmentAssumption(DL, ArgVal, Align);
+ AC->registerAssumption(NewAsmp);
}
}
}
@@ -1140,7 +1139,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
ValueToValueMapTy &VMap,
InlineFunctionInfo &IFI) {
CallGraph &CG = *IFI.CG;
- const Function *Caller = CS.getInstruction()->getParent()->getParent();
+ const Function *Caller = CS.getCaller();
const Function *Callee = CS.getCalledFunction();
CallGraphNode *CalleeNode = CG[Callee];
CallGraphNode *CallerNode = CG[Caller];
@@ -1225,7 +1224,8 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
PointerType *ArgTy = cast<PointerType>(Arg->getType());
Type *AggTy = ArgTy->getElementType();
- Function *Caller = TheCall->getParent()->getParent();
+ Function *Caller = TheCall->getFunction();
+ const DataLayout &DL = Caller->getParent()->getDataLayout();
// If the called function is readonly, then it could not mutate the caller's
// copy of the byval'd memory. In this case, it is safe to elide the copy and
@@ -1239,31 +1239,30 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
AssumptionCache *AC =
IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr;
- const DataLayout &DL = Caller->getParent()->getDataLayout();
// If the pointer is already known to be sufficiently aligned, or if we can
// round it up to a larger alignment, then we don't need a temporary.
if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, AC) >=
ByValAlignment)
return Arg;
-
+
// Otherwise, we have to make a memcpy to get a safe alignment. This is bad
// for code quality, but rarely happens and is required for correctness.
}
// Create the alloca. If we have DataLayout, use nice alignment.
- unsigned Align =
- Caller->getParent()->getDataLayout().getPrefTypeAlignment(AggTy);
+ unsigned Align = DL.getPrefTypeAlignment(AggTy);
// If the byval had an alignment specified, we *must* use at least that
// alignment, as it is required by the byval argument (and uses of the
// pointer inside the callee).
Align = std::max(Align, ByValAlignment);
-
- Value *NewAlloca = new AllocaInst(AggTy, nullptr, Align, Arg->getName(),
+
+ Value *NewAlloca = new AllocaInst(AggTy, DL.getAllocaAddrSpace(),
+ nullptr, Align, Arg->getName(),
&*Caller->begin()->begin());
IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
-
+
// Uses of the argument in the function should use our new alloca
// instead.
return NewAlloca;
@@ -1393,6 +1392,89 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
}
}
}
+/// Update the block frequencies of the caller after a callee has been inlined.
+///
+/// Each block cloned into the caller has its block frequency scaled by the
+/// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of
+/// callee's entry block gets the same frequency as the callsite block and the
+/// relative frequencies of all cloned blocks remain the same after cloning.
+static void updateCallerBFI(BasicBlock *CallSiteBlock,
+ const ValueToValueMapTy &VMap,
+ BlockFrequencyInfo *CallerBFI,
+ BlockFrequencyInfo *CalleeBFI,
+ const BasicBlock &CalleeEntryBlock) {
+ SmallPtrSet<BasicBlock *, 16> ClonedBBs;
+ for (auto const &Entry : VMap) {
+ if (!isa<BasicBlock>(Entry.first) || !Entry.second)
+ continue;
+ auto *OrigBB = cast<BasicBlock>(Entry.first);
+ auto *ClonedBB = cast<BasicBlock>(Entry.second);
+ uint64_t Freq = CalleeBFI->getBlockFreq(OrigBB).getFrequency();
+ if (!ClonedBBs.insert(ClonedBB).second) {
+ // Multiple blocks in the callee might get mapped to one cloned block in
+ // the caller since we prune the callee as we clone it. When that happens,
+ // we want to use the maximum among the original blocks' frequencies.
+ uint64_t NewFreq = CallerBFI->getBlockFreq(ClonedBB).getFrequency();
+ if (NewFreq > Freq)
+ Freq = NewFreq;
+ }
+ CallerBFI->setBlockFreq(ClonedBB, Freq);
+ }
+ BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock));
+ CallerBFI->setBlockFreqAndScale(
+ EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(),
+ ClonedBBs);
+}
+
+/// Update the branch metadata for cloned call instructions.
+static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
+ const Optional<uint64_t> &CalleeEntryCount,
+ const Instruction *TheCall) {
+ if (!CalleeEntryCount.hasValue() || CalleeEntryCount.getValue() < 1)
+ return;
+ Optional<uint64_t> CallSiteCount =
+ ProfileSummaryInfo::getProfileCount(TheCall, nullptr);
+ uint64_t CallCount =
+ std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0,
+ CalleeEntryCount.getValue());
+
+ for (auto const &Entry : VMap)
+ if (isa<CallInst>(Entry.first))
+ if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
+ CI->updateProfWeight(CallCount, CalleeEntryCount.getValue());
+ for (BasicBlock &BB : *Callee)
+ // No need to update the callsite if it is pruned during inlining.
+ if (VMap.count(&BB))
+ for (Instruction &I : BB)
+ if (CallInst *CI = dyn_cast<CallInst>(&I))
+ CI->updateProfWeight(CalleeEntryCount.getValue() - CallCount,
+ CalleeEntryCount.getValue());
+}
+
+/// Update the entry count of callee after inlining.
+///
+/// The callsite's block count is subtracted from the callee's function entry
+/// count.
+static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB,
+ Instruction *CallInst, Function *Callee) {
+ // If the callee has a original count of N, and the estimated count of
+ // callsite is M, the new callee count is set to N - M. M is estimated from
+ // the caller's entry count, its entry block frequency and the block frequency
+ // of the callsite.
+ Optional<uint64_t> CalleeCount = Callee->getEntryCount();
+ if (!CalleeCount.hasValue())
+ return;
+ Optional<uint64_t> CallCount =
+ ProfileSummaryInfo::getProfileCount(CallInst, CallerBFI);
+ if (!CallCount.hasValue())
+ return;
+ // Since CallSiteCount is an estimate, it could exceed the original callee
+ // count and has to be set to 0.
+ if (CallCount.getValue() > CalleeCount.getValue())
+ Callee->setEntryCount(0);
+ else
+ Callee->setEntryCount(CalleeCount.getValue() - CallCount.getValue());
+}
/// This function inlines the called function into the basic block of the
/// caller. This returns false if it is not possible to inline this call.
@@ -1405,13 +1487,13 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
AAResults *CalleeAAR, bool InsertLifetime) {
Instruction *TheCall = CS.getInstruction();
- assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
- "Instruction not in function!");
+ assert(TheCall->getParent() && TheCall->getFunction()
+ && "Instruction not in function!");
// If IFI has any state in it, zap it before we fill it in.
IFI.reset();
-
- const Function *CalledFunc = CS.getCalledFunction();
+
+ Function *CalledFunc = CS.getCalledFunction();
if (!CalledFunc || // Can't inline external function or indirect
CalledFunc->isDeclaration() || // call, or call to a vararg function!
CalledFunc->getFunctionType()->isVarArg()) return false;
@@ -1548,7 +1630,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// matches up the formal to the actual argument values.
CallSite::arg_iterator AI = CS.arg_begin();
unsigned ArgNo = 0;
- for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
+ for (Function::arg_iterator I = CalledFunc->arg_begin(),
E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
Value *ActualArg = *AI;
@@ -1578,10 +1660,18 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
/*ModuleLevelChanges=*/false, Returns, ".i",
&InlinedFunctionInfo, TheCall);
-
// Remember the first block that is newly cloned over.
FirstNewBlock = LastBlock; ++FirstNewBlock;
+ if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr)
+ // Update the BFI of blocks cloned into the caller.
+ updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI,
+ CalledFunc->front());
+
+ updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall);
+ // Update the profile count of callee.
+ updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc);
+
// Inject byval arguments initialization.
for (std::pair<Value*, Value*> &Init : ByValInit)
HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
@@ -2087,6 +2177,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
CalledFunc->getName() + ".exit");
}
+ if (IFI.CallerBFI) {
+ // Copy original BB's block frequency to AfterCallBB
+ IFI.CallerBFI->setBlockFreq(
+ AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB).getFrequency());
+ }
+
// Change the branch that used to go to AfterCallBB to branch to the first
// basic block of the inlined function.
//
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 68c6b74d5e5b..49b4bd92faf4 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -87,7 +87,8 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
Instruction *I = Worklist.pop_back_val();
BasicBlock *InstBB = I->getParent();
Loop *L = LI.getLoopFor(InstBB);
- if (!LoopExitBlocks.count(L))
+ assert(L && "Instruction belongs to a BB that's not part of a loop");
+ if (!LoopExitBlocks.count(L))
L->getExitBlocks(LoopExitBlocks[L]);
assert(LoopExitBlocks.count(L));
const SmallVectorImpl<BasicBlock *> &ExitBlocks = LoopExitBlocks[L];
@@ -105,7 +106,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
for (Use &U : I->uses()) {
Instruction *User = cast<Instruction>(U.getUser());
BasicBlock *UserBB = User->getParent();
- if (PHINode *PN = dyn_cast<PHINode>(User))
+ if (auto *PN = dyn_cast<PHINode>(User))
UserBB = PN->getIncomingBlock(U);
if (InstBB != UserBB && !L->contains(UserBB))
@@ -123,7 +124,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
// DomBB dominates the value, so adjust DomBB to the normal destination
// block, which is effectively where the value is first usable.
BasicBlock *DomBB = InstBB;
- if (InvokeInst *Inv = dyn_cast<InvokeInst>(I))
+ if (auto *Inv = dyn_cast<InvokeInst>(I))
DomBB = Inv->getNormalDest();
DomTreeNode *DomNode = DT.getNode(DomBB);
@@ -188,7 +189,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
// block.
Instruction *User = cast<Instruction>(UseToRewrite->getUser());
BasicBlock *UserBB = User->getParent();
- if (PHINode *PN = dyn_cast<PHINode>(User))
+ if (auto *PN = dyn_cast<PHINode>(User))
UserBB = PN->getIncomingBlock(*UseToRewrite);
if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
@@ -237,40 +238,75 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
return Changed;
}
-/// Return true if the specified block dominates at least
-/// one of the blocks in the specified list.
-static bool
-blockDominatesAnExit(BasicBlock *BB,
- DominatorTree &DT,
- const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
- DomTreeNode *DomNode = DT.getNode(BB);
- return any_of(ExitBlocks, [&](BasicBlock *EB) {
- return DT.dominates(DomNode, DT.getNode(EB));
- });
+// Compute the set of BasicBlocks in the loop `L` dominating at least one exit.
+static void computeBlocksDominatingExits(
+ Loop &L, DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks,
+ SmallPtrSet<BasicBlock *, 8> &BlocksDominatingExits) {
+ SmallVector<BasicBlock *, 8> BBWorklist;
+
+ // We start from the exit blocks, as every block trivially dominates itself
+ // (not strictly).
+ for (BasicBlock *BB : ExitBlocks)
+ BBWorklist.push_back(BB);
+
+ while (!BBWorklist.empty()) {
+ BasicBlock *BB = BBWorklist.pop_back_val();
+
+ // Check if this is a loop header. If this is the case, we're done.
+ if (L.getHeader() == BB)
+ continue;
+
+ // Otherwise, add its immediate predecessor in the dominator tree to the
+ // worklist, unless we visited it already.
+ BasicBlock *IDomBB = DT.getNode(BB)->getIDom()->getBlock();
+
+ // Exit blocks can have an immediate dominator not beloinging to the
+ // loop. For an exit block to be immediately dominated by another block
+ // outside the loop, it implies not all paths from that dominator, to the
+ // exit block, go through the loop.
+ // Example:
+ //
+ // |---- A
+ // | |
+ // | B<--
+ // | | |
+ // |---> C --
+ // |
+ // D
+ //
+ // C is the exit block of the loop and it's immediately dominated by A,
+ // which doesn't belong to the loop.
+ if (!L.contains(IDomBB))
+ continue;
+
+ if (BlocksDominatingExits.insert(IDomBB).second)
+ BBWorklist.push_back(IDomBB);
+ }
}
bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
ScalarEvolution *SE) {
bool Changed = false;
- // Get the set of exiting blocks.
SmallVector<BasicBlock *, 8> ExitBlocks;
L.getExitBlocks(ExitBlocks);
-
if (ExitBlocks.empty())
return false;
+ SmallPtrSet<BasicBlock *, 8> BlocksDominatingExits;
+
+ // We want to avoid use-scanning leveraging dominance informations.
+ // If a block doesn't dominate any of the loop exits, the none of the values
+ // defined in the loop can be used outside.
+ // We compute the set of blocks fullfilling the conditions in advance
+ // walking the dominator tree upwards until we hit a loop header.
+ computeBlocksDominatingExits(L, DT, ExitBlocks, BlocksDominatingExits);
+
SmallVector<Instruction *, 8> Worklist;
// Look at all the instructions in the loop, checking to see if they have uses
// outside the loop. If so, put them into the worklist to rewrite those uses.
- for (BasicBlock *BB : L.blocks()) {
- // For large loops, avoid use-scanning by using dominance information: In
- // particular, if a block does not dominate any of the loop exits, then none
- // of the values defined in the block could be used outside the loop.
- if (!blockDominatesAnExit(BB, DT, ExitBlocks))
- continue;
-
+ for (BasicBlock *BB : BlocksDominatingExits) {
for (Instruction &I : *BB) {
// Reject two common cases fast: instructions with no uses (like stores)
// and instructions with one use that is in the same block as this.
@@ -395,8 +431,8 @@ PreservedAnalyses LCSSAPass::run(Function &F, FunctionAnalysisManager &AM) {
if (!formLCSSAOnAllLoops(&LI, DT, SE))
return PreservedAnalyses::all();
- // FIXME: This should also 'preserve the CFG'.
PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<BasicAA>();
PA.preserve<GlobalsAA>();
PA.preserve<SCEVAA>();
diff --git a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index d97cd7582eaa..fe93d6927c63 100644
--- a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -100,12 +100,12 @@ private:
bool perform(CallInst *CI);
void checkCandidate(CallInst &CI);
void shrinkWrapCI(CallInst *CI, Value *Cond);
- bool performCallDomainErrorOnly(CallInst *CI, const LibFunc::Func &Func);
- bool performCallErrors(CallInst *CI, const LibFunc::Func &Func);
- bool performCallRangeErrorOnly(CallInst *CI, const LibFunc::Func &Func);
- Value *generateOneRangeCond(CallInst *CI, const LibFunc::Func &Func);
- Value *generateTwoRangeCond(CallInst *CI, const LibFunc::Func &Func);
- Value *generateCondForPow(CallInst *CI, const LibFunc::Func &Func);
+ bool performCallDomainErrorOnly(CallInst *CI, const LibFunc &Func);
+ bool performCallErrors(CallInst *CI, const LibFunc &Func);
+ bool performCallRangeErrorOnly(CallInst *CI, const LibFunc &Func);
+ Value *generateOneRangeCond(CallInst *CI, const LibFunc &Func);
+ Value *generateTwoRangeCond(CallInst *CI, const LibFunc &Func);
+ Value *generateCondForPow(CallInst *CI, const LibFunc &Func);
// Create an OR of two conditions.
Value *createOrCond(CallInst *CI, CmpInst::Predicate Cmp, float Val,
@@ -141,44 +141,44 @@ private:
// Perform the transformation to calls with errno set by domain error.
bool LibCallsShrinkWrap::performCallDomainErrorOnly(CallInst *CI,
- const LibFunc::Func &Func) {
+ const LibFunc &Func) {
Value *Cond = nullptr;
switch (Func) {
- case LibFunc::acos: // DomainError: (x < -1 || x > 1)
- case LibFunc::acosf: // Same as acos
- case LibFunc::acosl: // Same as acos
- case LibFunc::asin: // DomainError: (x < -1 || x > 1)
- case LibFunc::asinf: // Same as asin
- case LibFunc::asinl: // Same as asin
+ case LibFunc_acos: // DomainError: (x < -1 || x > 1)
+ case LibFunc_acosf: // Same as acos
+ case LibFunc_acosl: // Same as acos
+ case LibFunc_asin: // DomainError: (x < -1 || x > 1)
+ case LibFunc_asinf: // Same as asin
+ case LibFunc_asinl: // Same as asin
{
++NumWrappedTwoCond;
Cond = createOrCond(CI, CmpInst::FCMP_OLT, -1.0f, CmpInst::FCMP_OGT, 1.0f);
break;
}
- case LibFunc::cos: // DomainError: (x == +inf || x == -inf)
- case LibFunc::cosf: // Same as cos
- case LibFunc::cosl: // Same as cos
- case LibFunc::sin: // DomainError: (x == +inf || x == -inf)
- case LibFunc::sinf: // Same as sin
- case LibFunc::sinl: // Same as sin
+ case LibFunc_cos: // DomainError: (x == +inf || x == -inf)
+ case LibFunc_cosf: // Same as cos
+ case LibFunc_cosl: // Same as cos
+ case LibFunc_sin: // DomainError: (x == +inf || x == -inf)
+ case LibFunc_sinf: // Same as sin
+ case LibFunc_sinl: // Same as sin
{
++NumWrappedTwoCond;
Cond = createOrCond(CI, CmpInst::FCMP_OEQ, INFINITY, CmpInst::FCMP_OEQ,
-INFINITY);
break;
}
- case LibFunc::acosh: // DomainError: (x < 1)
- case LibFunc::acoshf: // Same as acosh
- case LibFunc::acoshl: // Same as acosh
+ case LibFunc_acosh: // DomainError: (x < 1)
+ case LibFunc_acoshf: // Same as acosh
+ case LibFunc_acoshl: // Same as acosh
{
++NumWrappedOneCond;
Cond = createCond(CI, CmpInst::FCMP_OLT, 1.0f);
break;
}
- case LibFunc::sqrt: // DomainError: (x < 0)
- case LibFunc::sqrtf: // Same as sqrt
- case LibFunc::sqrtl: // Same as sqrt
+ case LibFunc_sqrt: // DomainError: (x < 0)
+ case LibFunc_sqrtf: // Same as sqrt
+ case LibFunc_sqrtl: // Same as sqrt
{
++NumWrappedOneCond;
Cond = createCond(CI, CmpInst::FCMP_OLT, 0.0f);
@@ -193,31 +193,31 @@ bool LibCallsShrinkWrap::performCallDomainErrorOnly(CallInst *CI,
// Perform the transformation to calls with errno set by range error.
bool LibCallsShrinkWrap::performCallRangeErrorOnly(CallInst *CI,
- const LibFunc::Func &Func) {
+ const LibFunc &Func) {
Value *Cond = nullptr;
switch (Func) {
- case LibFunc::cosh:
- case LibFunc::coshf:
- case LibFunc::coshl:
- case LibFunc::exp:
- case LibFunc::expf:
- case LibFunc::expl:
- case LibFunc::exp10:
- case LibFunc::exp10f:
- case LibFunc::exp10l:
- case LibFunc::exp2:
- case LibFunc::exp2f:
- case LibFunc::exp2l:
- case LibFunc::sinh:
- case LibFunc::sinhf:
- case LibFunc::sinhl: {
+ case LibFunc_cosh:
+ case LibFunc_coshf:
+ case LibFunc_coshl:
+ case LibFunc_exp:
+ case LibFunc_expf:
+ case LibFunc_expl:
+ case LibFunc_exp10:
+ case LibFunc_exp10f:
+ case LibFunc_exp10l:
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
+ case LibFunc_exp2l:
+ case LibFunc_sinh:
+ case LibFunc_sinhf:
+ case LibFunc_sinhl: {
Cond = generateTwoRangeCond(CI, Func);
break;
}
- case LibFunc::expm1: // RangeError: (709, inf)
- case LibFunc::expm1f: // RangeError: (88, inf)
- case LibFunc::expm1l: // RangeError: (11356, inf)
+ case LibFunc_expm1: // RangeError: (709, inf)
+ case LibFunc_expm1f: // RangeError: (88, inf)
+ case LibFunc_expm1l: // RangeError: (11356, inf)
{
Cond = generateOneRangeCond(CI, Func);
break;
@@ -231,15 +231,15 @@ bool LibCallsShrinkWrap::performCallRangeErrorOnly(CallInst *CI,
// Perform the transformation to calls with errno set by combination of errors.
bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
- const LibFunc::Func &Func) {
+ const LibFunc &Func) {
Value *Cond = nullptr;
switch (Func) {
- case LibFunc::atanh: // DomainError: (x < -1 || x > 1)
+ case LibFunc_atanh: // DomainError: (x < -1 || x > 1)
// PoleError: (x == -1 || x == 1)
// Overall Cond: (x <= -1 || x >= 1)
- case LibFunc::atanhf: // Same as atanh
- case LibFunc::atanhl: // Same as atanh
+ case LibFunc_atanhf: // Same as atanh
+ case LibFunc_atanhl: // Same as atanh
{
if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError)
return false;
@@ -247,20 +247,20 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
Cond = createOrCond(CI, CmpInst::FCMP_OLE, -1.0f, CmpInst::FCMP_OGE, 1.0f);
break;
}
- case LibFunc::log: // DomainError: (x < 0)
+ case LibFunc_log: // DomainError: (x < 0)
// PoleError: (x == 0)
// Overall Cond: (x <= 0)
- case LibFunc::logf: // Same as log
- case LibFunc::logl: // Same as log
- case LibFunc::log10: // Same as log
- case LibFunc::log10f: // Same as log
- case LibFunc::log10l: // Same as log
- case LibFunc::log2: // Same as log
- case LibFunc::log2f: // Same as log
- case LibFunc::log2l: // Same as log
- case LibFunc::logb: // Same as log
- case LibFunc::logbf: // Same as log
- case LibFunc::logbl: // Same as log
+ case LibFunc_logf: // Same as log
+ case LibFunc_logl: // Same as log
+ case LibFunc_log10: // Same as log
+ case LibFunc_log10f: // Same as log
+ case LibFunc_log10l: // Same as log
+ case LibFunc_log2: // Same as log
+ case LibFunc_log2f: // Same as log
+ case LibFunc_log2l: // Same as log
+ case LibFunc_logb: // Same as log
+ case LibFunc_logbf: // Same as log
+ case LibFunc_logbl: // Same as log
{
if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError)
return false;
@@ -268,11 +268,11 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
Cond = createCond(CI, CmpInst::FCMP_OLE, 0.0f);
break;
}
- case LibFunc::log1p: // DomainError: (x < -1)
+ case LibFunc_log1p: // DomainError: (x < -1)
// PoleError: (x == -1)
// Overall Cond: (x <= -1)
- case LibFunc::log1pf: // Same as log1p
- case LibFunc::log1pl: // Same as log1p
+ case LibFunc_log1pf: // Same as log1p
+ case LibFunc_log1pl: // Same as log1p
{
if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError)
return false;
@@ -280,11 +280,11 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
Cond = createCond(CI, CmpInst::FCMP_OLE, -1.0f);
break;
}
- case LibFunc::pow: // DomainError: x < 0 and y is noninteger
+ case LibFunc_pow: // DomainError: x < 0 and y is noninteger
// PoleError: x == 0 and y < 0
// RangeError: overflow or underflow
- case LibFunc::powf:
- case LibFunc::powl: {
+ case LibFunc_powf:
+ case LibFunc_powl: {
if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError ||
!LibCallsShrinkWrapDoRangeError)
return false;
@@ -313,7 +313,7 @@ void LibCallsShrinkWrap::checkCandidate(CallInst &CI) {
if (!CI.use_empty())
return;
- LibFunc::Func Func;
+ LibFunc Func;
Function *Callee = CI.getCalledFunction();
if (!Callee)
return;
@@ -333,16 +333,16 @@ void LibCallsShrinkWrap::checkCandidate(CallInst &CI) {
// Generate the upper bound condition for RangeError.
Value *LibCallsShrinkWrap::generateOneRangeCond(CallInst *CI,
- const LibFunc::Func &Func) {
+ const LibFunc &Func) {
float UpperBound;
switch (Func) {
- case LibFunc::expm1: // RangeError: (709, inf)
+ case LibFunc_expm1: // RangeError: (709, inf)
UpperBound = 709.0f;
break;
- case LibFunc::expm1f: // RangeError: (88, inf)
+ case LibFunc_expm1f: // RangeError: (88, inf)
UpperBound = 88.0f;
break;
- case LibFunc::expm1l: // RangeError: (11356, inf)
+ case LibFunc_expm1l: // RangeError: (11356, inf)
UpperBound = 11356.0f;
break;
default:
@@ -355,57 +355,57 @@ Value *LibCallsShrinkWrap::generateOneRangeCond(CallInst *CI,
// Generate the lower and upper bound condition for RangeError.
Value *LibCallsShrinkWrap::generateTwoRangeCond(CallInst *CI,
- const LibFunc::Func &Func) {
+ const LibFunc &Func) {
float UpperBound, LowerBound;
switch (Func) {
- case LibFunc::cosh: // RangeError: (x < -710 || x > 710)
- case LibFunc::sinh: // Same as cosh
+ case LibFunc_cosh: // RangeError: (x < -710 || x > 710)
+ case LibFunc_sinh: // Same as cosh
LowerBound = -710.0f;
UpperBound = 710.0f;
break;
- case LibFunc::coshf: // RangeError: (x < -89 || x > 89)
- case LibFunc::sinhf: // Same as coshf
+ case LibFunc_coshf: // RangeError: (x < -89 || x > 89)
+ case LibFunc_sinhf: // Same as coshf
LowerBound = -89.0f;
UpperBound = 89.0f;
break;
- case LibFunc::coshl: // RangeError: (x < -11357 || x > 11357)
- case LibFunc::sinhl: // Same as coshl
+ case LibFunc_coshl: // RangeError: (x < -11357 || x > 11357)
+ case LibFunc_sinhl: // Same as coshl
LowerBound = -11357.0f;
UpperBound = 11357.0f;
break;
- case LibFunc::exp: // RangeError: (x < -745 || x > 709)
+ case LibFunc_exp: // RangeError: (x < -745 || x > 709)
LowerBound = -745.0f;
UpperBound = 709.0f;
break;
- case LibFunc::expf: // RangeError: (x < -103 || x > 88)
+ case LibFunc_expf: // RangeError: (x < -103 || x > 88)
LowerBound = -103.0f;
UpperBound = 88.0f;
break;
- case LibFunc::expl: // RangeError: (x < -11399 || x > 11356)
+ case LibFunc_expl: // RangeError: (x < -11399 || x > 11356)
LowerBound = -11399.0f;
UpperBound = 11356.0f;
break;
- case LibFunc::exp10: // RangeError: (x < -323 || x > 308)
+ case LibFunc_exp10: // RangeError: (x < -323 || x > 308)
LowerBound = -323.0f;
UpperBound = 308.0f;
break;
- case LibFunc::exp10f: // RangeError: (x < -45 || x > 38)
+ case LibFunc_exp10f: // RangeError: (x < -45 || x > 38)
LowerBound = -45.0f;
UpperBound = 38.0f;
break;
- case LibFunc::exp10l: // RangeError: (x < -4950 || x > 4932)
+ case LibFunc_exp10l: // RangeError: (x < -4950 || x > 4932)
LowerBound = -4950.0f;
UpperBound = 4932.0f;
break;
- case LibFunc::exp2: // RangeError: (x < -1074 || x > 1023)
+ case LibFunc_exp2: // RangeError: (x < -1074 || x > 1023)
LowerBound = -1074.0f;
UpperBound = 1023.0f;
break;
- case LibFunc::exp2f: // RangeError: (x < -149 || x > 127)
+ case LibFunc_exp2f: // RangeError: (x < -149 || x > 127)
LowerBound = -149.0f;
UpperBound = 127.0f;
break;
- case LibFunc::exp2l: // RangeError: (x < -16445 || x > 11383)
+ case LibFunc_exp2l: // RangeError: (x < -16445 || x > 11383)
LowerBound = -16445.0f;
UpperBound = 11383.0f;
break;
@@ -434,9 +434,9 @@ Value *LibCallsShrinkWrap::generateTwoRangeCond(CallInst *CI,
// (i.e. we might invoke the calls that will not set the errno.).
//
Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,
- const LibFunc::Func &Func) {
- // FIXME: LibFunc::powf and powl TBD.
- if (Func != LibFunc::pow) {
+ const LibFunc &Func) {
+ // FIXME: LibFunc_powf and powl TBD.
+ if (Func != LibFunc_pow) {
DEBUG(dbgs() << "Not handled powf() and powl()\n");
return nullptr;
}
@@ -516,7 +516,7 @@ void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) {
// Perform the transformation to a single candidate.
bool LibCallsShrinkWrap::perform(CallInst *CI) {
- LibFunc::Func Func;
+ LibFunc Func;
Function *Callee = CI->getCalledFunction();
assert(Callee && "perform() should apply to a non-empty callee");
TLI.getLibFunc(*Callee, Func);
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 6e4174aa0cda..18b29226c2ef 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -126,21 +126,20 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// If the default is unreachable, ignore it when searching for TheOnlyDest.
if (isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg()) &&
SI->getNumCases() > 0) {
- TheOnlyDest = SI->case_begin().getCaseSuccessor();
+ TheOnlyDest = SI->case_begin()->getCaseSuccessor();
}
// Figure out which case it goes to.
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i) {
+ for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) {
// Found case matching a constant operand?
- if (i.getCaseValue() == CI) {
- TheOnlyDest = i.getCaseSuccessor();
+ if (i->getCaseValue() == CI) {
+ TheOnlyDest = i->getCaseSuccessor();
break;
}
// Check to see if this branch is going to the same place as the default
// dest. If so, eliminate it as an explicit compare.
- if (i.getCaseSuccessor() == DefaultDest) {
+ if (i->getCaseSuccessor() == DefaultDest) {
MDNode *MD = SI->getMetadata(LLVMContext::MD_prof);
unsigned NCases = SI->getNumCases();
// Fold the case metadata into the default if there will be any branches
@@ -154,7 +153,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
Weights.push_back(CI->getValue().getZExtValue());
}
// Merge weight of this case to the default weight.
- unsigned idx = i.getCaseIndex();
+ unsigned idx = i->getCaseIndex();
Weights[0] += Weights[idx+1];
// Remove weight for this case.
std::swap(Weights[idx+1], Weights.back());
@@ -165,15 +164,19 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
}
// Remove this entry.
DefaultDest->removePredecessor(SI->getParent());
- SI->removeCase(i);
- --i; --e;
+ i = SI->removeCase(i);
+ e = SI->case_end();
continue;
}
// Otherwise, check to see if the switch only branches to one destination.
// We do this by reseting "TheOnlyDest" to null when we find two non-equal
// destinations.
- if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = nullptr;
+ if (i->getCaseSuccessor() != TheOnlyDest)
+ TheOnlyDest = nullptr;
+
+ // Increment this iterator as we haven't removed the case.
+ ++i;
}
if (CI && !TheOnlyDest) {
@@ -209,7 +212,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
if (SI->getNumCases() == 1) {
// Otherwise, we can fold this switch into a conditional branch
// instruction if it has only one non-default destination.
- SwitchInst::CaseIt FirstCase = SI->case_begin();
+ auto FirstCase = *SI->case_begin();
Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
FirstCase.getCaseValue(), "cond");
@@ -287,7 +290,15 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
///
bool llvm::isInstructionTriviallyDead(Instruction *I,
const TargetLibraryInfo *TLI) {
- if (!I->use_empty() || isa<TerminatorInst>(I)) return false;
+ if (!I->use_empty())
+ return false;
+ return wouldInstructionBeTriviallyDead(I, TLI);
+}
+
+bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
+ const TargetLibraryInfo *TLI) {
+ if (isa<TerminatorInst>(I))
+ return false;
// We don't want the landingpad-like instructions removed by anything this
// general.
@@ -307,7 +318,8 @@ bool llvm::isInstructionTriviallyDead(Instruction *I,
return true;
}
- if (!I->mayHaveSideEffects()) return true;
+ if (!I->mayHaveSideEffects())
+ return true;
// Special case intrinsics that "may have side effects" but can be deleted
// when dead.
@@ -334,7 +346,8 @@ bool llvm::isInstructionTriviallyDead(Instruction *I,
}
}
- if (isAllocLikeFn(I, TLI)) return true;
+ if (isAllocLikeFn(I, TLI))
+ return true;
if (CallInst *CI = isFreeCall(I, TLI))
if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0)))
@@ -1075,11 +1088,11 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar,
// Since we can't guarantee that the original dbg.declare instrinsic
// is removed by LowerDbgDeclare(), we need to make sure that we are
// not inserting the same dbg.value intrinsic over and over.
- DbgValueList DbgValues;
- FindAllocaDbgValues(DbgValues, APN);
- for (auto DVI : DbgValues) {
- assert (DVI->getValue() == APN);
- assert (DVI->getOffset() == 0);
+ SmallVector<DbgValueInst *, 1> DbgValues;
+ findDbgValues(DbgValues, APN);
+ for (auto *DVI : DbgValues) {
+ assert(DVI->getValue() == APN);
+ assert(DVI->getOffset() == 0);
if ((DVI->getVariable() == DIVar) && (DVI->getExpression() == DIExpr))
return true;
}
@@ -1241,9 +1254,7 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
return nullptr;
}
-/// FindAllocaDbgValues - Finds the llvm.dbg.value intrinsics describing the
-/// alloca 'V', if any.
-void llvm::FindAllocaDbgValues(DbgValueList &DbgValues, Value *V) {
+void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
if (auto *L = LocalAsMetadata::getIfExists(V))
if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))
for (User *U : MDV->users())
@@ -1251,36 +1262,32 @@ void llvm::FindAllocaDbgValues(DbgValueList &DbgValues, Value *V) {
DbgValues.push_back(DVI);
}
-static void DIExprAddDeref(SmallVectorImpl<uint64_t> &Expr) {
- Expr.push_back(dwarf::DW_OP_deref);
-}
-
-static void DIExprAddOffset(SmallVectorImpl<uint64_t> &Expr, int Offset) {
+static void appendOffset(SmallVectorImpl<uint64_t> &Ops, int64_t Offset) {
if (Offset > 0) {
- Expr.push_back(dwarf::DW_OP_plus);
- Expr.push_back(Offset);
+ Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(Offset);
} else if (Offset < 0) {
- Expr.push_back(dwarf::DW_OP_minus);
- Expr.push_back(-Offset);
+ Ops.push_back(dwarf::DW_OP_minus);
+ Ops.push_back(-Offset);
}
}
-static DIExpression *BuildReplacementDIExpr(DIBuilder &Builder,
- DIExpression *DIExpr, bool Deref,
- int Offset) {
+/// Prepend \p DIExpr with a deref and offset operation.
+static DIExpression *prependDIExpr(DIBuilder &Builder, DIExpression *DIExpr,
+ bool Deref, int64_t Offset) {
if (!Deref && !Offset)
return DIExpr;
// Create a copy of the original DIDescriptor for user variable, prepending
// "deref" operation to a list of address elements, as new llvm.dbg.declare
// will take a value storing address of the memory for variable, not
// alloca itself.
- SmallVector<uint64_t, 4> NewDIExpr;
+ SmallVector<uint64_t, 4> Ops;
if (Deref)
- DIExprAddDeref(NewDIExpr);
- DIExprAddOffset(NewDIExpr, Offset);
+ Ops.push_back(dwarf::DW_OP_deref);
+ appendOffset(Ops, Offset);
if (DIExpr)
- NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
- return Builder.createExpression(NewDIExpr);
+ Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
+ return Builder.createExpression(Ops);
}
bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
@@ -1294,7 +1301,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
auto *DIExpr = DDI->getExpression();
assert(DIVar && "Missing variable");
- DIExpr = BuildReplacementDIExpr(Builder, DIExpr, Deref, Offset);
+ DIExpr = prependDIExpr(Builder, DIExpr, Deref, Offset);
// Insert llvm.dbg.declare immediately after the original alloca, and remove
// old llvm.dbg.declare.
@@ -1326,11 +1333,11 @@ static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
// Insert the offset immediately after the first deref.
// We could just change the offset argument of dbg.value, but it's unsigned...
if (Offset) {
- SmallVector<uint64_t, 4> NewDIExpr;
- DIExprAddDeref(NewDIExpr);
- DIExprAddOffset(NewDIExpr, Offset);
- NewDIExpr.append(DIExpr->elements_begin() + 1, DIExpr->elements_end());
- DIExpr = Builder.createExpression(NewDIExpr);
+ SmallVector<uint64_t, 4> Ops;
+ Ops.push_back(dwarf::DW_OP_deref);
+ appendOffset(Ops, Offset);
+ Ops.append(DIExpr->elements_begin() + 1, DIExpr->elements_end());
+ DIExpr = Builder.createExpression(Ops);
}
Builder.insertDbgValueIntrinsic(NewAddress, DVI->getOffset(), DIVar, DIExpr,
@@ -1349,6 +1356,53 @@ void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
}
}
+void llvm::salvageDebugInfo(Instruction &I) {
+ SmallVector<DbgValueInst *, 1> DbgValues;
+ auto &M = *I.getModule();
+
+ auto MDWrap = [&](Value *V) {
+ return MetadataAsValue::get(I.getContext(), ValueAsMetadata::get(V));
+ };
+
+ if (isa<BitCastInst>(&I)) {
+ findDbgValues(DbgValues, &I);
+ for (auto *DVI : DbgValues) {
+ // Bitcasts are entirely irrelevant for debug info. Rewrite the dbg.value
+ // to use the cast's source.
+ DVI->setOperand(0, MDWrap(I.getOperand(0)));
+ DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
+ }
+ } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+ findDbgValues(DbgValues, &I);
+ for (auto *DVI : DbgValues) {
+ unsigned BitWidth =
+ M.getDataLayout().getPointerSizeInBits(GEP->getPointerAddressSpace());
+ APInt Offset(BitWidth, 0);
+ // Rewrite a constant GEP into a DIExpression.
+ if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) {
+ auto *DIExpr = DVI->getExpression();
+ DIBuilder DIB(M, /*AllowUnresolved*/ false);
+ // GEP offsets are i32 and thus alwaus fit into an int64_t.
+ DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue());
+ DVI->setOperand(0, MDWrap(I.getOperand(0)));
+ DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
+ DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
+ }
+ }
+ } else if (isa<LoadInst>(&I)) {
+ findDbgValues(DbgValues, &I);
+ for (auto *DVI : DbgValues) {
+ // Rewrite the load into DW_OP_deref.
+ auto *DIExpr = DVI->getExpression();
+ DIBuilder DIB(M, /*AllowUnresolved*/ false);
+ DIExpr = prependDIExpr(DIB, DIExpr, WithDeref, 0);
+ DVI->setOperand(0, MDWrap(I.getOperand(0)));
+ DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
+ DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
+ }
+ }
+}
+
unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
unsigned NumDeadInst = 0;
// Delete the instructions backwards, as it has a reduced likelihood of
@@ -2068,9 +2122,9 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
void llvm::maybeMarkSanitizerLibraryCallNoBuiltin(
CallInst *CI, const TargetLibraryInfo *TLI) {
Function *F = CI->getCalledFunction();
- LibFunc::Func Func;
+ LibFunc Func;
if (F && !F->hasLocalLinkage() && F->hasName() &&
TLI->getLibFunc(F->getName(), Func) && TLI->hasOptimizedCodeGen(Func) &&
!F->doesNotAccessMemory())
- CI->addAttribute(AttributeSet::FunctionIndex, Attribute::NoBuiltin);
+ CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoBuiltin);
}
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 00cda2af00c6..e7ba19665d59 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -645,14 +645,7 @@ ReprocessLoop:
// loop-invariant instructions out of the way to open up more
// opportunities, and the disadvantage of having the responsibility
// to preserve dominator information.
- bool UniqueExit = true;
- if (!ExitBlocks.empty())
- for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
- if (ExitBlocks[i] != ExitBlocks[0]) {
- UniqueExit = false;
- break;
- }
- if (UniqueExit) {
+ if (ExitBlockSet.size() == 1) {
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
BasicBlock *ExitingBlock = ExitingBlocks[i];
if (!ExitingBlock->getSinglePredecessor()) continue;
@@ -735,6 +728,17 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
bool PreserveLCSSA) {
bool Changed = false;
+#ifndef NDEBUG
+ // If we're asked to preserve LCSSA, the loop nest needs to start in LCSSA
+ // form.
+ if (PreserveLCSSA) {
+ assert(DT && "DT not available.");
+ assert(LI && "LI not available.");
+ assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
+ "Requested to preserve LCSSA, but it's already broken.");
+ }
+#endif
+
// Worklist maintains our depth-first queue of loops in this nest to process.
SmallVector<Loop *, 4> Worklist;
Worklist.push_back(L);
@@ -814,15 +818,6 @@ bool LoopSimplify::runOnFunction(Function &F) {
&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
-#ifndef NDEBUG
- if (PreserveLCSSA) {
- assert(DT && "DT not available.");
- assert(LI && "LI not available.");
- bool InLCSSA = all_of(
- *LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT, *LI); });
- assert(InLCSSA && "Requested to preserve LCSSA, but it's already broken.");
- }
-#endif
// Simplify each loop nest in the function.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
@@ -846,17 +841,14 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F,
ScalarEvolution *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F);
AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
- // FIXME: This pass should verify that the loops on which it's operating
- // are in canonical SSA form, and that the pass itself preserves this form.
+ // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA
+ // after simplifying the loops.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- Changed |= simplifyLoop(*I, DT, LI, SE, AC, true /* PreserveLCSSA */);
-
- // FIXME: We need to invalidate this to avoid PR28400. Is there a better
- // solution?
- AM.invalidate<ScalarEvolutionAnalysis>(F);
+ Changed |= simplifyLoop(*I, DT, LI, SE, AC, /*PreserveLCSSA*/ false);
if (!Changed)
return PreservedAnalyses::all();
+
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LoopAnalysis>();
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index e346ebd6a000..3c669ce644e2 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -27,6 +27,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
@@ -51,6 +52,16 @@ UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
cl::desc("Allow runtime unrolled loops to be unrolled "
"with epilog instead of prolog."));
+static cl::opt<bool>
+UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
+ cl::desc("Verify domtree after unrolling"),
+#ifdef NDEBUG
+ cl::init(false)
+#else
+ cl::init(true)
+#endif
+ );
+
/// Convert the instruction operands from referencing the current values into
/// those specified by VMap.
static inline void remapInstruction(Instruction *I,
@@ -205,6 +216,45 @@ const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB,
}
}
+/// The function chooses which type of unroll (epilog or prolog) is more
+/// profitabale.
+/// Epilog unroll is more profitable when there is PHI that starts from
+/// constant. In this case epilog will leave PHI start from constant,
+/// but prolog will convert it to non-constant.
+///
+/// loop:
+/// PN = PHI [I, Latch], [CI, PreHeader]
+/// I = foo(PN)
+/// ...
+///
+/// Epilog unroll case.
+/// loop:
+/// PN = PHI [I2, Latch], [CI, PreHeader]
+/// I1 = foo(PN)
+/// I2 = foo(I1)
+/// ...
+/// Prolog unroll case.
+/// NewPN = PHI [PrologI, Prolog], [CI, PreHeader]
+/// loop:
+/// PN = PHI [I2, Latch], [NewPN, PreHeader]
+/// I1 = foo(PN)
+/// I2 = foo(I1)
+/// ...
+///
+static bool isEpilogProfitable(Loop *L) {
+ BasicBlock *PreHeader = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ assert(PreHeader && Header);
+ for (Instruction &BBI : *Header) {
+ PHINode *PN = dyn_cast<PHINode>(&BBI);
+ if (!PN)
+ break;
+ if (isa<ConstantInt>(PN->getIncomingValueForBlock(PreHeader)))
+ return true;
+ }
+ return false;
+}
+
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
/// if unrolling was successful, or false if the loop was unmodified. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
@@ -296,8 +346,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
Count = TripCount;
// Don't enter the unroll code if there is nothing to do.
- if (TripCount == 0 && Count < 2 && PeelCount == 0)
+ if (TripCount == 0 && Count < 2 && PeelCount == 0) {
+ DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
return false;
+ }
assert(Count > 0);
assert(TripMultiple > 0);
@@ -330,7 +382,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
"and peeling for the same loop");
if (PeelCount)
- peelLoop(L, PeelCount, LI, SE, DT, PreserveLCSSA);
+ peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA);
// Loops containing convergent instructions must have a count that divides
// their TripMultiple.
@@ -346,14 +398,22 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
"convergent operation.");
});
+ bool EpilogProfitability =
+ UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog
+ : isEpilogProfitable(L);
+
if (RuntimeTripCount && TripMultiple % Count != 0 &&
!UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
- UnrollRuntimeEpilog, LI, SE, DT,
+ EpilogProfitability, LI, SE, DT,
PreserveLCSSA)) {
if (Force)
RuntimeTripCount = false;
- else
+ else {
+ DEBUG(
+ dbgs() << "Wont unroll; remainder loop could not be generated"
+ "when assuming runtime trip count\n");
return false;
+ }
}
// Notify ScalarEvolution that the loop will be substantially changed,
@@ -446,6 +506,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
for (Loop *SubLoop : *L)
LoopsToSimplify.insert(SubLoop);
+ if (Header->getParent()->isDebugInfoForProfiling())
+ for (BasicBlock *BB : L->getBlocks())
+ for (Instruction &I : *BB)
+ if (const DILocation *DIL = I.getDebugLoc())
+ I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count));
+
for (unsigned It = 1; It != Count; ++It) {
std::vector<BasicBlock*> NewBlocks;
SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
@@ -456,19 +522,16 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
Header->getParent()->getBasicBlockList().push_back(New);
+ assert((*BB != Header || LI->getLoopFor(*BB) == L) &&
+ "Header should not be in a sub-loop");
// Tell LI about New.
- if (*BB == Header) {
- assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop");
- L->addBasicBlockToLoop(New, *LI);
- } else {
- const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops);
- if (OldLoop) {
- LoopsToSimplify.insert(NewLoops[OldLoop]);
+ const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops);
+ if (OldLoop) {
+ LoopsToSimplify.insert(NewLoops[OldLoop]);
- // Forget the old loop, since its inputs may have changed.
- if (SE)
- SE->forgetLoop(OldLoop);
- }
+ // Forget the old loop, since its inputs may have changed.
+ if (SE)
+ SE->forgetLoop(OldLoop);
}
if (*BB == Header)
@@ -615,14 +678,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
Term->eraseFromParent();
}
}
+
// Update dominators of blocks we might reach through exits.
// Immediate dominator of such block might change, because we add more
// routes which can lead to the exit: we can now reach it from the copied
- // iterations too. Thus, the new idom of the block will be the nearest
- // common dominator of the previous idom and common dominator of all copies of
- // the previous idom. This is equivalent to the nearest common dominator of
- // the previous idom and the first latch, which dominates all copies of the
- // previous idom.
+ // iterations too.
if (DT && Count > 1) {
for (auto *BB : OriginalLoopBlocks) {
auto *BBDomNode = DT->getNode(BB);
@@ -632,12 +692,38 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
if (!L->contains(ChildBB))
ChildrenToUpdate.push_back(ChildBB);
}
- BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, Latches[0]);
+ BasicBlock *NewIDom;
+ if (BB == LatchBlock) {
+ // The latch is special because we emit unconditional branches in
+ // some cases where the original loop contained a conditional branch.
+ // Since the latch is always at the bottom of the loop, if the latch
+ // dominated an exit before unrolling, the new dominator of that exit
+ // must also be a latch. Specifically, the dominator is the first
+ // latch which ends in a conditional branch, or the last latch if
+ // there is no such latch.
+ NewIDom = Latches.back();
+ for (BasicBlock *IterLatch : Latches) {
+ TerminatorInst *Term = IterLatch->getTerminator();
+ if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
+ NewIDom = IterLatch;
+ break;
+ }
+ }
+ } else {
+ // The new idom of the block will be the nearest common dominator
+ // of all copies of the previous idom. This is equivalent to the
+ // nearest common dominator of the previous idom and the first latch,
+ // which dominates all copies of the previous idom.
+ NewIDom = DT->findNearestCommonDominator(BB, LatchBlock);
+ }
for (auto *ChildBB : ChildrenToUpdate)
DT->changeImmediateDominator(ChildBB, NewIDom);
}
}
+ if (DT && UnrollVerifyDomtree)
+ DT->verifyDomTree();
+
// Merge adjacent basic blocks, if possible.
SmallPtrSet<Loop *, 4> ForgottenLoops;
for (BasicBlock *Latch : Latches) {
@@ -655,13 +741,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
}
}
- // FIXME: We only preserve DT info for complete unrolling now. Incrementally
- // updating domtree after partial loop unrolling should also be easy.
- if (DT && !CompletelyUnroll)
- DT->recalculate(*L->getHeader()->getParent());
- else if (DT)
- DEBUG(DT->verifyDomTree());
-
// Simplify any new induction variables in the partially unrolled loop.
if (SE && !CompletelyUnroll && Count > 1) {
SmallVector<WeakVH, 16> DeadInsts;
@@ -721,29 +800,29 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
// at least one layer outside of the loop that was unrolled so that any
// changes to the parent loop exposed by the unrolling are considered.
if (DT) {
- if (!OuterL && !CompletelyUnroll)
- OuterL = L;
if (OuterL) {
// OuterL includes all loops for which we can break loop-simplify, so
// it's sufficient to simplify only it (it'll recursively simplify inner
// loops too).
+ if (NeedToFixLCSSA) {
+ // LCSSA must be performed on the outermost affected loop. The unrolled
+ // loop's last loop latch is guaranteed to be in the outermost loop
+ // after LoopInfo's been updated by markAsRemoved.
+ Loop *LatchLoop = LI->getLoopFor(Latches.back());
+ Loop *FixLCSSALoop = OuterL;
+ if (!FixLCSSALoop->contains(LatchLoop))
+ while (FixLCSSALoop->getParentLoop() != LatchLoop)
+ FixLCSSALoop = FixLCSSALoop->getParentLoop();
+
+ formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE);
+ } else if (PreserveLCSSA) {
+ assert(OuterL->isLCSSAForm(*DT) &&
+ "Loops should be in LCSSA form after loop-unroll.");
+ }
+
// TODO: That potentially might be compile-time expensive. We should try
// to fix the loop-simplified form incrementally.
simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA);
-
- // LCSSA must be performed on the outermost affected loop. The unrolled
- // loop's last loop latch is guaranteed to be in the outermost loop after
- // LoopInfo's been updated by markAsRemoved.
- Loop *LatchLoop = LI->getLoopFor(Latches.back());
- if (!OuterL->contains(LatchLoop))
- while (OuterL->getParentLoop() != LatchLoop)
- OuterL = OuterL->getParentLoop();
-
- if (NeedToFixLCSSA)
- formLCSSARecursively(*OuterL, *DT, LI, SE);
- else
- assert(OuterL->isLCSSAForm(*DT) &&
- "Loops should be in LCSSA form after loop-unroll.");
} else {
// Simplify loops for which we might've broken loop-simplify form.
for (Loop *SubLoop : LoopsToSimplify)
diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 842cf31f2e3d..73c14f5606b7 100644
--- a/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -28,6 +28,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <algorithm>
@@ -55,12 +56,20 @@ static bool canPeel(Loop *L) {
if (!L->getExitingBlock() || !L->getUniqueExitBlock())
return false;
+ // Don't try to peel loops where the latch is not the exiting block.
+ // This can be an indication of two different things:
+ // 1) The loop is not rotated.
+ // 2) The loop contains irreducible control flow that involves the latch.
+ if (L->getLoopLatch() != L->getExitingBlock())
+ return false;
+
return true;
}
// Return the number of iterations we want to peel off.
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
- TargetTransformInfo::UnrollingPreferences &UP) {
+ TargetTransformInfo::UnrollingPreferences &UP,
+ unsigned &TripCount) {
UP.PeelCount = 0;
if (!canPeel(L))
return;
@@ -69,6 +78,39 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (!L->empty())
return;
+ // Try to find a Phi node that has the same loop invariant as an input from
+ // its only back edge. If there is such Phi, peeling 1 iteration from the
+ // loop is profitable, because starting from 2nd iteration we will have an
+ // invariant instead of this Phi.
+ if (LoopSize <= UP.Threshold) {
+ BasicBlock *BackEdge = L->getLoopLatch();
+ assert(BackEdge && "Loop is not in simplified form?");
+ BasicBlock *Header = L->getHeader();
+ // Iterate over Phis to find one with invariant input on back edge.
+ bool FoundCandidate = false;
+ PHINode *Phi;
+ for (auto BI = Header->begin(); isa<PHINode>(&*BI); ++BI) {
+ Phi = cast<PHINode>(&*BI);
+ Value *Input = Phi->getIncomingValueForBlock(BackEdge);
+ if (L->isLoopInvariant(Input)) {
+ FoundCandidate = true;
+ break;
+ }
+ }
+ if (FoundCandidate) {
+ DEBUG(dbgs() << "Peel one iteration to get rid of " << *Phi
+ << " because starting from 2nd iteration it is always"
+ << " an invariant\n");
+ UP.PeelCount = 1;
+ return;
+ }
+ }
+
+ // Bail if we know the statically calculated trip count.
+ // In this case we rather prefer partial unrolling.
+ if (TripCount)
+ return;
+
// If the user provided a peel count, use that.
bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
if (UserPeelCount) {
@@ -164,7 +206,8 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
BasicBlock *InsertBot, BasicBlock *Exit,
SmallVectorImpl<BasicBlock *> &NewBlocks,
LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
- ValueToValueMapTy &LVMap, LoopInfo *LI) {
+ ValueToValueMapTy &LVMap, DominatorTree *DT,
+ LoopInfo *LI) {
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
@@ -185,6 +228,17 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
ParentLoop->addBasicBlockToLoop(NewBB, *LI);
VMap[*BB] = NewBB;
+
+ // If dominator tree is available, insert nodes to represent cloned blocks.
+ if (DT) {
+ if (Header == *BB)
+ DT->addNewBlock(NewBB, InsertTop);
+ else {
+ DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
+ // VMap must contain entry for IDom, as the iteration order is RPO.
+ DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
+ }
+ }
}
// Hook-up the control flow for the newly inserted blocks.
@@ -198,11 +252,13 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
// The backedge now goes to the "bottom", which is either the loop's real
// header (for the last peeled iteration) or the copied header of the next
// iteration (for every other iteration)
- BranchInst *LatchBR =
- cast<BranchInst>(cast<BasicBlock>(VMap[Latch])->getTerminator());
+ BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
+ BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator());
unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
LatchBR->setSuccessor(HeaderIdx, InsertBot);
LatchBR->setSuccessor(1 - HeaderIdx, Exit);
+ if (DT)
+ DT->changeImmediateDominator(InsertBot, NewLatch);
// The new copy of the loop body starts with a bunch of PHI nodes
// that pick an incoming value from either the preheader, or the previous
@@ -257,7 +313,7 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
/// optimizations.
bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
ScalarEvolution *SE, DominatorTree *DT,
- bool PreserveLCSSA) {
+ AssumptionCache *AC, bool PreserveLCSSA) {
if (!canPeel(L))
return false;
@@ -358,7 +414,24 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
CurHeaderWeight = 1;
cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit,
- NewBlocks, LoopBlocks, VMap, LVMap, LI);
+ NewBlocks, LoopBlocks, VMap, LVMap, DT, LI);
+
+ // Remap to use values from the current iteration instead of the
+ // previous one.
+ remapInstructionsInBlocks(NewBlocks, VMap);
+
+ if (DT) {
+ // Latches of the cloned loops dominate over the loop exit, so idom of the
+ // latter is the first cloned loop body, as original PreHeader dominates
+ // the original loop body.
+ if (Iter == 0)
+ DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch]));
+#ifndef NDEBUG
+ if (VerifyDomInfo)
+ DT->verifyDomTree();
+#endif
+ }
+
updateBranchWeights(InsertBot, cast<BranchInst>(VMap[LatchBR]), Iter,
PeelCount, ExitWeight);
@@ -369,10 +442,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
F->getBasicBlockList().splice(InsertTop->getIterator(),
F->getBasicBlockList(),
NewBlocks[0]->getIterator(), F->end());
-
- // Remap to use values from the current iteration instead of the
- // previous one.
- remapInstructionsInBlocks(NewBlocks, VMap);
}
// Now adjust the phi nodes in the loop header to get their initial values
@@ -405,9 +474,16 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
}
// If the loop is nested, we changed the parent loop, update SE.
- if (Loop *ParentLoop = L->getParentLoop())
+ if (Loop *ParentLoop = L->getParentLoop()) {
SE->forgetLoop(ParentLoop);
+ // FIXME: Incrementally update loop-simplify
+ simplifyLoop(ParentLoop, DT, LI, SE, AC, PreserveLCSSA);
+ } else {
+ // FIXME: Incrementally update loop-simplify
+ simplifyLoop(L, DT, LI, SE, AC, PreserveLCSSA);
+ }
+
NumPeeled++;
return true;
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index d3ea1564115b..85db734fb182 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -146,6 +146,8 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
// Add the branch to the exit block (around the unrolled loop)
B.CreateCondBr(BrLoopExit, Exit, NewPreHeader);
InsertPt->eraseFromParent();
+ if (DT)
+ DT->changeImmediateDominator(Exit, PrologExit);
}
/// Connect the unrolling epilog code to the original loop.
@@ -260,13 +262,20 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
IRBuilder<> B(InsertPt);
Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
assert(Exit && "Loop must have a single exit block only");
- // Split the exit to maintain loop canonicalization guarantees
+ // Split the epilogue exit to maintain loop canonicalization guarantees
SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI,
PreserveLCSSA);
// Add the branch to the exit block (around the unrolling loop)
B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
InsertPt->eraseFromParent();
+ if (DT)
+ DT->changeImmediateDominator(Exit, NewExit);
+
+ // Split the main loop exit to maintain canonicalization guarantees.
+ SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
+ SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI,
+ PreserveLCSSA);
}
/// Create a clone of the blocks in a loop and connect them together.
@@ -284,27 +293,17 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter,
BasicBlock *Preheader,
std::vector<BasicBlock *> &NewBlocks,
LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
- LoopInfo *LI) {
+ DominatorTree *DT, LoopInfo *LI) {
StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
Function *F = Header->getParent();
LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
- Loop *NewLoop = nullptr;
Loop *ParentLoop = L->getParentLoop();
- if (CreateRemainderLoop) {
- NewLoop = new Loop();
- if (ParentLoop)
- ParentLoop->addChildLoop(NewLoop);
- else
- LI->addTopLevelLoop(NewLoop);
- }
-
NewLoopsMap NewLoops;
- if (NewLoop)
- NewLoops[L] = NewLoop;
- else if (ParentLoop)
+ NewLoops[ParentLoop] = ParentLoop;
+ if (!CreateRemainderLoop)
NewLoops[L] = ParentLoop;
// For each block in the original loop, create a new copy,
@@ -312,7 +311,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter,
for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
NewBlocks.push_back(NewBB);
-
+
// If we're unrolling the outermost loop, there's no remainder loop,
// and this block isn't in a nested loop, then the new block is not
// in any loop. Otherwise, add it to loopinfo.
@@ -326,6 +325,17 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter,
InsertTop->getTerminator()->setSuccessor(0, NewBB);
}
+ if (DT) {
+ if (Header == *BB) {
+ // The header is dominated by the preheader.
+ DT->addNewBlock(NewBB, InsertTop);
+ } else {
+ // Copy information from original loop to unrolled loop.
+ BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
+ DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
+ }
+ }
+
if (Latch == *BB) {
// For the last block, if CreateRemainderLoop is false, create a direct
// jump to InsertBot. If not, create a loop back to cloned head.
@@ -376,7 +386,9 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter,
NewPHI->setIncomingValue(idx, V);
}
}
- if (NewLoop) {
+ if (CreateRemainderLoop) {
+ Loop *NewLoop = NewLoops[L];
+ assert(NewLoop && "L should have been cloned");
// Add unroll disable metadata to disable future unrolling for this loop.
SmallVector<Metadata *, 4> MDs;
// Reserve first location for self reference to the LoopID metadata node.
@@ -599,6 +611,12 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// Branch to either remainder (extra iterations) loop or unrolling loop.
B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
PreHeaderBR->eraseFromParent();
+ if (DT) {
+ if (UseEpilogRemainder)
+ DT->changeImmediateDominator(NewExit, PreHeader);
+ else
+ DT->changeImmediateDominator(PrologExit, PreHeader);
+ }
Function *F = Header->getParent();
// Get an ordered list of blocks in the loop to help with the ordering of the
// cloned blocks in the prolog/epilog code
@@ -623,7 +641,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit;
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop,
- InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI);
+ InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
// Insert the cloned blocks into the function.
F->getBasicBlockList().splice(InsertBot->getIterator(),
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index c8efa9efc7f3..175d013a011d 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -230,8 +230,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
// - PHI:
// - All uses of the PHI must be the reduction (safe).
// - Otherwise, not safe.
- // - By one instruction outside of the loop (safe).
- // - By further instructions outside of the loop (not safe).
+ // - By instructions outside of the loop (safe).
+ // * One value may have several outside users, but all outside
+ // uses must be of the same value.
// - By an instruction that is not part of the reduction (not safe).
// This is either:
// * An instruction type other than PHI or the reduction operation.
@@ -297,10 +298,15 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
// Check if we found the exit user.
BasicBlock *Parent = UI->getParent();
if (!TheLoop->contains(Parent)) {
- // Exit if you find multiple outside users or if the header phi node is
- // being used. In this case the user uses the value of the previous
- // iteration, in which case we would loose "VF-1" iterations of the
- // reduction operation if we vectorize.
+ // If we already know this instruction is used externally, move on to
+ // the next user.
+ if (ExitInstruction == Cur)
+ continue;
+
+ // Exit if you find multiple values used outside or if the header phi
+ // node is being used. In this case the user uses the value of the
+ // previous iteration, in which case we would loose "VF-1" iterations of
+ // the reduction operation if we vectorize.
if (ExitInstruction != nullptr || Cur == Phi)
return false;
@@ -547,13 +553,14 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous))
return false;
- // Ensure every user of the phi node is dominated by the previous value. The
- // dominance requirement ensures the loop vectorizer will not need to
+ // Ensure every user of the phi node is dominated by the previous value.
+ // The dominance requirement ensures the loop vectorizer will not need to
// vectorize the initial value prior to the first iteration of the loop.
for (User *U : Phi->users())
- if (auto *I = dyn_cast<Instruction>(U))
+ if (auto *I = dyn_cast<Instruction>(U)) {
if (!DT->dominates(Previous, I))
return false;
+ }
return true;
}
diff --git a/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
new file mode 100644
index 000000000000..c7cb561b5e21
--- /dev/null
+++ b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -0,0 +1,231 @@
+//===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+
+using namespace llvm;
+
+void llvm::createMemCpyLoop(Instruction *InsertBefore,
+ Value *SrcAddr, Value *DstAddr, Value *CopyLen,
+ unsigned SrcAlign, unsigned DestAlign,
+ bool SrcIsVolatile, bool DstIsVolatile) {
+ Type *TypeOfCopyLen = CopyLen->getType();
+
+ BasicBlock *OrigBB = InsertBefore->getParent();
+ Function *F = OrigBB->getParent();
+ BasicBlock *NewBB =
+ InsertBefore->getParent()->splitBasicBlock(InsertBefore, "split");
+ BasicBlock *LoopBB = BasicBlock::Create(F->getContext(), "loadstoreloop",
+ F, NewBB);
+
+ OrigBB->getTerminator()->setSuccessor(0, LoopBB);
+ IRBuilder<> Builder(OrigBB->getTerminator());
+
+ // SrcAddr and DstAddr are expected to be pointer types,
+ // so no check is made here.
+ unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
+ unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
+
+ // Cast pointers to (char *)
+ SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS));
+ DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS));
+
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
+ LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
+
+ // load from SrcAddr+LoopIndex
+ // TODO: we can leverage the align parameter of llvm.memcpy for more efficient
+ // word-sized loads and stores.
+ Value *Element =
+ LoopBuilder.CreateLoad(LoopBuilder.CreateInBoundsGEP(
+ LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex),
+ SrcIsVolatile);
+ // store at DstAddr+LoopIndex
+ LoopBuilder.CreateStore(Element,
+ LoopBuilder.CreateInBoundsGEP(LoopBuilder.getInt8Ty(),
+ DstAddr, LoopIndex),
+ DstIsVolatile);
+
+ // The value for LoopIndex coming from backedge is (LoopIndex + 1)
+ Value *NewIndex =
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
+ LoopIndex->addIncoming(NewIndex, LoopBB);
+
+ LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
+ NewBB);
+}
+
+// Lower memmove to IR. memmove is required to correctly copy overlapping memory
+// regions; therefore, it has to check the relative positions of the source and
+// destination pointers and choose the copy direction accordingly.
+//
+// The code below is an IR rendition of this C function:
+//
+// void* memmove(void* dst, const void* src, size_t n) {
+// unsigned char* d = dst;
+// const unsigned char* s = src;
+// if (s < d) {
+// // copy backwards
+// while (n--) {
+// d[n] = s[n];
+// }
+// } else {
+// // copy forward
+// for (size_t i = 0; i < n; ++i) {
+// d[i] = s[i];
+// }
+// }
+// return dst;
+// }
+static void createMemMoveLoop(Instruction *InsertBefore,
+ Value *SrcAddr, Value *DstAddr, Value *CopyLen,
+ unsigned SrcAlign, unsigned DestAlign,
+ bool SrcIsVolatile, bool DstIsVolatile) {
+ Type *TypeOfCopyLen = CopyLen->getType();
+ BasicBlock *OrigBB = InsertBefore->getParent();
+ Function *F = OrigBB->getParent();
+
+ // Create the a comparison of src and dst, based on which we jump to either
+ // the forward-copy part of the function (if src >= dst) or the backwards-copy
+ // part (if src < dst).
+ // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else
+ // structure. Its block terminators (unconditional branches) are replaced by
+ // the appropriate conditional branches when the loop is built.
+ ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT,
+ SrcAddr, DstAddr, "compare_src_dst");
+ TerminatorInst *ThenTerm, *ElseTerm;
+ SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm,
+ &ElseTerm);
+
+ // Each part of the function consists of two blocks:
+ // copy_backwards: used to skip the loop when n == 0
+ // copy_backwards_loop: the actual backwards loop BB
+ // copy_forward: used to skip the loop when n == 0
+ // copy_forward_loop: the actual forward loop BB
+ BasicBlock *CopyBackwardsBB = ThenTerm->getParent();
+ CopyBackwardsBB->setName("copy_backwards");
+ BasicBlock *CopyForwardBB = ElseTerm->getParent();
+ CopyForwardBB->setName("copy_forward");
+ BasicBlock *ExitBB = InsertBefore->getParent();
+ ExitBB->setName("memmove_done");
+
+ // Initial comparison of n == 0 that lets us skip the loops altogether. Shared
+ // between both backwards and forward copy clauses.
+ ICmpInst *CompareN =
+ new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen,
+ ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0");
+
+ // Copying backwards.
+ BasicBlock *LoopBB =
+ BasicBlock::Create(F->getContext(), "copy_backwards_loop", F, CopyForwardBB);
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
+ Value *IndexPtr = LoopBuilder.CreateSub(
+ LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr");
+ Value *Element = LoopBuilder.CreateLoad(
+ LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element");
+ LoopBuilder.CreateStore(Element,
+ LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr));
+ LoopBuilder.CreateCondBr(
+ LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)),
+ ExitBB, LoopBB);
+ LoopPhi->addIncoming(IndexPtr, LoopBB);
+ LoopPhi->addIncoming(CopyLen, CopyBackwardsBB);
+ BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm);
+ ThenTerm->eraseFromParent();
+
+ // Copying forward.
+ BasicBlock *FwdLoopBB =
+ BasicBlock::Create(F->getContext(), "copy_forward_loop", F, ExitBB);
+ IRBuilder<> FwdLoopBuilder(FwdLoopBB);
+ PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr");
+ Value *FwdElement = FwdLoopBuilder.CreateLoad(
+ FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element");
+ FwdLoopBuilder.CreateStore(
+ FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi));
+ Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd(
+ FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment");
+ FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen),
+ ExitBB, FwdLoopBB);
+ FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB);
+ FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB);
+
+ BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm);
+ ElseTerm->eraseFromParent();
+}
+
+static void createMemSetLoop(Instruction *InsertBefore,
+ Value *DstAddr, Value *CopyLen, Value *SetValue,
+ unsigned Align, bool IsVolatile) {
+ BasicBlock *OrigBB = InsertBefore->getParent();
+ Function *F = OrigBB->getParent();
+ BasicBlock *NewBB =
+ OrigBB->splitBasicBlock(InsertBefore, "split");
+ BasicBlock *LoopBB
+ = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB);
+
+ OrigBB->getTerminator()->setSuccessor(0, LoopBB);
+ IRBuilder<> Builder(OrigBB->getTerminator());
+
+ // Cast pointer to the type of value getting stored
+ unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
+ DstAddr = Builder.CreateBitCast(DstAddr,
+ PointerType::get(SetValue->getType(), dstAS));
+
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLen->getType(), 0);
+ LoopIndex->addIncoming(ConstantInt::get(CopyLen->getType(), 0), OrigBB);
+
+ LoopBuilder.CreateStore(
+ SetValue,
+ LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex),
+ IsVolatile);
+
+ Value *NewIndex =
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLen->getType(), 1));
+ LoopIndex->addIncoming(NewIndex, LoopBB);
+
+ LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
+ NewBB);
+}
+
+void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy) {
+ createMemCpyLoop(/* InsertBefore */ Memcpy,
+ /* SrcAddr */ Memcpy->getRawSource(),
+ /* DstAddr */ Memcpy->getRawDest(),
+ /* CopyLen */ Memcpy->getLength(),
+ /* SrcAlign */ Memcpy->getAlignment(),
+ /* DestAlign */ Memcpy->getAlignment(),
+ /* SrcIsVolatile */ Memcpy->isVolatile(),
+ /* DstIsVolatile */ Memcpy->isVolatile());
+}
+
+void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) {
+ createMemMoveLoop(/* InsertBefore */ Memmove,
+ /* SrcAddr */ Memmove->getRawSource(),
+ /* DstAddr */ Memmove->getRawDest(),
+ /* CopyLen */ Memmove->getLength(),
+ /* SrcAlign */ Memmove->getAlignment(),
+ /* DestAlign */ Memmove->getAlignment(),
+ /* SrcIsVolatile */ Memmove->isVolatile(),
+ /* DstIsVolatile */ Memmove->isVolatile());
+}
+
+void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
+ createMemSetLoop(/* InsertBefore */ Memset,
+ /* DstAddr */ Memset->getRawDest(),
+ /* CopyLen */ Memset->getLength(),
+ /* SetValue */ Memset->getValue(),
+ /* Alignment */ Memset->getAlignment(),
+ Memset->isVolatile());
+}
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 75cd3bc8b2bf..b375d51005d5 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -356,10 +356,10 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
unsigned numCmps = 0;
// Start with "simple" cases
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i)
- Cases.push_back(CaseRange(i.getCaseValue(), i.getCaseValue(),
- i.getCaseSuccessor()));
-
+ for (auto Case : SI->cases())
+ Cases.push_back(CaseRange(Case.getCaseValue(), Case.getCaseValue(),
+ Case.getCaseSuccessor()));
+
std::sort(Cases.begin(), Cases.end(), CaseCmp());
// Merge case into clusters
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index 24b3b12930ac..b659a2e4463f 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -46,7 +46,7 @@ static bool promoteMemoryToRegister(Function &F, DominatorTree &DT,
if (Allocas.empty())
break;
- PromoteMemToReg(Allocas, DT, nullptr, &AC);
+ PromoteMemToReg(Allocas, DT, &AC);
NumPromoted += Allocas.size();
Changed = true;
}
@@ -59,8 +59,9 @@ PreservedAnalyses PromotePass::run(Function &F, FunctionAnalysisManager &AM) {
if (!promoteMemoryToRegister(F, DT, AC))
return PreservedAnalyses::all();
- // FIXME: This should also 'preserve the CFG'.
- return PreservedAnalyses::none();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
namespace {
diff --git a/lib/Transforms/Utils/MemorySSA.cpp b/lib/Transforms/Utils/MemorySSA.cpp
deleted file mode 100644
index 1ce4225f09cc..000000000000
--- a/lib/Transforms/Utils/MemorySSA.cpp
+++ /dev/null
@@ -1,2305 +0,0 @@
-//===-- MemorySSA.cpp - Memory SSA Builder---------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------===//
-//
-// This file implements the MemorySSA class.
-//
-//===----------------------------------------------------------------===//
-#include "llvm/Transforms/Utils/MemorySSA.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/IteratedDominanceFrontier.h"
-#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/Analysis/PHITransAddr.h"
-#include "llvm/IR/AssemblyAnnotationWriter.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/PatternMatch.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Transforms/Scalar.h"
-#include <algorithm>
-
-#define DEBUG_TYPE "memoryssa"
-using namespace llvm;
-STATISTIC(NumClobberCacheLookups, "Number of Memory SSA version cache lookups");
-STATISTIC(NumClobberCacheHits, "Number of Memory SSA version cache hits");
-STATISTIC(NumClobberCacheInserts, "Number of MemorySSA version cache inserts");
-
-INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false,
- true)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false,
- true)
-
-INITIALIZE_PASS_BEGIN(MemorySSAPrinterLegacyPass, "print-memoryssa",
- "Memory SSA Printer", false, false)
-INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
-INITIALIZE_PASS_END(MemorySSAPrinterLegacyPass, "print-memoryssa",
- "Memory SSA Printer", false, false)
-
-static cl::opt<unsigned> MaxCheckLimit(
- "memssa-check-limit", cl::Hidden, cl::init(100),
- cl::desc("The maximum number of stores/phis MemorySSA"
- "will consider trying to walk past (default = 100)"));
-
-static cl::opt<bool>
- VerifyMemorySSA("verify-memoryssa", cl::init(false), cl::Hidden,
- cl::desc("Verify MemorySSA in legacy printer pass."));
-
-namespace llvm {
-/// \brief An assembly annotator class to print Memory SSA information in
-/// comments.
-class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter {
- friend class MemorySSA;
- const MemorySSA *MSSA;
-
-public:
- MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {}
-
- virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
- formatted_raw_ostream &OS) {
- if (MemoryAccess *MA = MSSA->getMemoryAccess(BB))
- OS << "; " << *MA << "\n";
- }
-
- virtual void emitInstructionAnnot(const Instruction *I,
- formatted_raw_ostream &OS) {
- if (MemoryAccess *MA = MSSA->getMemoryAccess(I))
- OS << "; " << *MA << "\n";
- }
-};
-}
-
-namespace {
-/// Our current alias analysis API differentiates heavily between calls and
-/// non-calls, and functions called on one usually assert on the other.
-/// This class encapsulates the distinction to simplify other code that wants
-/// "Memory affecting instructions and related data" to use as a key.
-/// For example, this class is used as a densemap key in the use optimizer.
-class MemoryLocOrCall {
-public:
- MemoryLocOrCall() : IsCall(false) {}
- MemoryLocOrCall(MemoryUseOrDef *MUD)
- : MemoryLocOrCall(MUD->getMemoryInst()) {}
- MemoryLocOrCall(const MemoryUseOrDef *MUD)
- : MemoryLocOrCall(MUD->getMemoryInst()) {}
-
- MemoryLocOrCall(Instruction *Inst) {
- if (ImmutableCallSite(Inst)) {
- IsCall = true;
- CS = ImmutableCallSite(Inst);
- } else {
- IsCall = false;
- // There is no such thing as a memorylocation for a fence inst, and it is
- // unique in that regard.
- if (!isa<FenceInst>(Inst))
- Loc = MemoryLocation::get(Inst);
- }
- }
-
- explicit MemoryLocOrCall(const MemoryLocation &Loc)
- : IsCall(false), Loc(Loc) {}
-
- bool IsCall;
- ImmutableCallSite getCS() const {
- assert(IsCall);
- return CS;
- }
- MemoryLocation getLoc() const {
- assert(!IsCall);
- return Loc;
- }
-
- bool operator==(const MemoryLocOrCall &Other) const {
- if (IsCall != Other.IsCall)
- return false;
-
- if (IsCall)
- return CS.getCalledValue() == Other.CS.getCalledValue();
- return Loc == Other.Loc;
- }
-
-private:
- union {
- ImmutableCallSite CS;
- MemoryLocation Loc;
- };
-};
-}
-
-namespace llvm {
-template <> struct DenseMapInfo<MemoryLocOrCall> {
- static inline MemoryLocOrCall getEmptyKey() {
- return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getEmptyKey());
- }
- static inline MemoryLocOrCall getTombstoneKey() {
- return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getTombstoneKey());
- }
- static unsigned getHashValue(const MemoryLocOrCall &MLOC) {
- if (MLOC.IsCall)
- return hash_combine(MLOC.IsCall,
- DenseMapInfo<const Value *>::getHashValue(
- MLOC.getCS().getCalledValue()));
- return hash_combine(
- MLOC.IsCall, DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc()));
- }
- static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) {
- return LHS == RHS;
- }
-};
-
-enum class Reorderability { Always, IfNoAlias, Never };
-
-/// This does one-way checks to see if Use could theoretically be hoisted above
-/// MayClobber. This will not check the other way around.
-///
-/// This assumes that, for the purposes of MemorySSA, Use comes directly after
-/// MayClobber, with no potentially clobbering operations in between them.
-/// (Where potentially clobbering ops are memory barriers, aliased stores, etc.)
-static Reorderability getLoadReorderability(const LoadInst *Use,
- const LoadInst *MayClobber) {
- bool VolatileUse = Use->isVolatile();
- bool VolatileClobber = MayClobber->isVolatile();
- // Volatile operations may never be reordered with other volatile operations.
- if (VolatileUse && VolatileClobber)
- return Reorderability::Never;
-
- // The lang ref allows reordering of volatile and non-volatile operations.
- // Whether an aliasing nonvolatile load and volatile load can be reordered,
- // though, is ambiguous. Because it may not be best to exploit this ambiguity,
- // we only allow volatile/non-volatile reordering if the volatile and
- // non-volatile operations don't alias.
- Reorderability Result = VolatileUse || VolatileClobber
- ? Reorderability::IfNoAlias
- : Reorderability::Always;
-
- // If a load is seq_cst, it cannot be moved above other loads. If its ordering
- // is weaker, it can be moved above other loads. We just need to be sure that
- // MayClobber isn't an acquire load, because loads can't be moved above
- // acquire loads.
- //
- // Note that this explicitly *does* allow the free reordering of monotonic (or
- // weaker) loads of the same address.
- bool SeqCstUse = Use->getOrdering() == AtomicOrdering::SequentiallyConsistent;
- bool MayClobberIsAcquire = isAtLeastOrStrongerThan(MayClobber->getOrdering(),
- AtomicOrdering::Acquire);
- if (SeqCstUse || MayClobberIsAcquire)
- return Reorderability::Never;
- return Result;
-}
-
-static bool instructionClobbersQuery(MemoryDef *MD,
- const MemoryLocation &UseLoc,
- const Instruction *UseInst,
- AliasAnalysis &AA) {
- Instruction *DefInst = MD->getMemoryInst();
- assert(DefInst && "Defining instruction not actually an instruction");
-
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
- // These intrinsics will show up as affecting memory, but they are just
- // markers.
- switch (II->getIntrinsicID()) {
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- case Intrinsic::invariant_start:
- case Intrinsic::invariant_end:
- case Intrinsic::assume:
- return false;
- default:
- break;
- }
- }
-
- ImmutableCallSite UseCS(UseInst);
- if (UseCS) {
- ModRefInfo I = AA.getModRefInfo(DefInst, UseCS);
- return I != MRI_NoModRef;
- }
-
- if (auto *DefLoad = dyn_cast<LoadInst>(DefInst)) {
- if (auto *UseLoad = dyn_cast<LoadInst>(UseInst)) {
- switch (getLoadReorderability(UseLoad, DefLoad)) {
- case Reorderability::Always:
- return false;
- case Reorderability::Never:
- return true;
- case Reorderability::IfNoAlias:
- return !AA.isNoAlias(UseLoc, MemoryLocation::get(DefLoad));
- }
- }
- }
-
- return AA.getModRefInfo(DefInst, UseLoc) & MRI_Mod;
-}
-
-static bool instructionClobbersQuery(MemoryDef *MD, const MemoryUseOrDef *MU,
- const MemoryLocOrCall &UseMLOC,
- AliasAnalysis &AA) {
- // FIXME: This is a temporary hack to allow a single instructionClobbersQuery
- // to exist while MemoryLocOrCall is pushed through places.
- if (UseMLOC.IsCall)
- return instructionClobbersQuery(MD, MemoryLocation(), MU->getMemoryInst(),
- AA);
- return instructionClobbersQuery(MD, UseMLOC.getLoc(), MU->getMemoryInst(),
- AA);
-}
-
-// Return true when MD may alias MU, return false otherwise.
-bool defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU,
- AliasAnalysis &AA) {
- return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA);
-}
-}
-
-namespace {
-struct UpwardsMemoryQuery {
- // True if our original query started off as a call
- bool IsCall;
- // The pointer location we started the query with. This will be empty if
- // IsCall is true.
- MemoryLocation StartingLoc;
- // This is the instruction we were querying about.
- const Instruction *Inst;
- // The MemoryAccess we actually got called with, used to test local domination
- const MemoryAccess *OriginalAccess;
-
- UpwardsMemoryQuery()
- : IsCall(false), Inst(nullptr), OriginalAccess(nullptr) {}
-
- UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access)
- : IsCall(ImmutableCallSite(Inst)), Inst(Inst), OriginalAccess(Access) {
- if (!IsCall)
- StartingLoc = MemoryLocation::get(Inst);
- }
-};
-
-static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc,
- AliasAnalysis &AA) {
- Instruction *Inst = MD->getMemoryInst();
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- switch (II->getIntrinsicID()) {
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), Loc);
- default:
- return false;
- }
- }
- return false;
-}
-
-static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA,
- const Instruction *I) {
- // If the memory can't be changed, then loads of the memory can't be
- // clobbered.
- //
- // FIXME: We should handle invariant groups, as well. It's a bit harder,
- // because we need to pay close attention to invariant group barriers.
- return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) ||
- AA.pointsToConstantMemory(I));
-}
-
-/// Cache for our caching MemorySSA walker.
-class WalkerCache {
- DenseMap<ConstMemoryAccessPair, MemoryAccess *> Accesses;
- DenseMap<const MemoryAccess *, MemoryAccess *> Calls;
-
-public:
- MemoryAccess *lookup(const MemoryAccess *MA, const MemoryLocation &Loc,
- bool IsCall) const {
- ++NumClobberCacheLookups;
- MemoryAccess *R = IsCall ? Calls.lookup(MA) : Accesses.lookup({MA, Loc});
- if (R)
- ++NumClobberCacheHits;
- return R;
- }
-
- bool insert(const MemoryAccess *MA, MemoryAccess *To,
- const MemoryLocation &Loc, bool IsCall) {
- // This is fine for Phis, since there are times where we can't optimize
- // them. Making a def its own clobber is never correct, though.
- assert((MA != To || isa<MemoryPhi>(MA)) &&
- "Something can't clobber itself!");
-
- ++NumClobberCacheInserts;
- bool Inserted;
- if (IsCall)
- Inserted = Calls.insert({MA, To}).second;
- else
- Inserted = Accesses.insert({{MA, Loc}, To}).second;
-
- return Inserted;
- }
-
- bool remove(const MemoryAccess *MA, const MemoryLocation &Loc, bool IsCall) {
- return IsCall ? Calls.erase(MA) : Accesses.erase({MA, Loc});
- }
-
- void clear() {
- Accesses.clear();
- Calls.clear();
- }
-
- bool contains(const MemoryAccess *MA) const {
- for (auto &P : Accesses)
- if (P.first.first == MA || P.second == MA)
- return true;
- for (auto &P : Calls)
- if (P.first == MA || P.second == MA)
- return true;
- return false;
- }
-};
-
-/// Walks the defining uses of MemoryDefs. Stops after we hit something that has
-/// no defining use (e.g. a MemoryPhi or liveOnEntry). Note that, when comparing
-/// against a null def_chain_iterator, this will compare equal only after
-/// walking said Phi/liveOnEntry.
-struct def_chain_iterator
- : public iterator_facade_base<def_chain_iterator, std::forward_iterator_tag,
- MemoryAccess *> {
- def_chain_iterator() : MA(nullptr) {}
- def_chain_iterator(MemoryAccess *MA) : MA(MA) {}
-
- MemoryAccess *operator*() const { return MA; }
-
- def_chain_iterator &operator++() {
- // N.B. liveOnEntry has a null defining access.
- if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
- MA = MUD->getDefiningAccess();
- else
- MA = nullptr;
- return *this;
- }
-
- bool operator==(const def_chain_iterator &O) const { return MA == O.MA; }
-
-private:
- MemoryAccess *MA;
-};
-
-static iterator_range<def_chain_iterator>
-def_chain(MemoryAccess *MA, MemoryAccess *UpTo = nullptr) {
-#ifdef EXPENSIVE_CHECKS
- assert((!UpTo || find(def_chain(MA), UpTo) != def_chain_iterator()) &&
- "UpTo isn't in the def chain!");
-#endif
- return make_range(def_chain_iterator(MA), def_chain_iterator(UpTo));
-}
-
-/// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing
-/// inbetween `Start` and `ClobberAt` can clobbers `Start`.
-///
-/// This is meant to be as simple and self-contained as possible. Because it
-/// uses no cache, etc., it can be relatively expensive.
-///
-/// \param Start The MemoryAccess that we want to walk from.
-/// \param ClobberAt A clobber for Start.
-/// \param StartLoc The MemoryLocation for Start.
-/// \param MSSA The MemorySSA isntance that Start and ClobberAt belong to.
-/// \param Query The UpwardsMemoryQuery we used for our search.
-/// \param AA The AliasAnalysis we used for our search.
-static void LLVM_ATTRIBUTE_UNUSED
-checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt,
- const MemoryLocation &StartLoc, const MemorySSA &MSSA,
- const UpwardsMemoryQuery &Query, AliasAnalysis &AA) {
- assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?");
-
- if (MSSA.isLiveOnEntryDef(Start)) {
- assert(MSSA.isLiveOnEntryDef(ClobberAt) &&
- "liveOnEntry must clobber itself");
- return;
- }
-
- bool FoundClobber = false;
- DenseSet<MemoryAccessPair> VisitedPhis;
- SmallVector<MemoryAccessPair, 8> Worklist;
- Worklist.emplace_back(Start, StartLoc);
- // Walk all paths from Start to ClobberAt, while looking for clobbers. If one
- // is found, complain.
- while (!Worklist.empty()) {
- MemoryAccessPair MAP = Worklist.pop_back_val();
- // All we care about is that nothing from Start to ClobberAt clobbers Start.
- // We learn nothing from revisiting nodes.
- if (!VisitedPhis.insert(MAP).second)
- continue;
-
- for (MemoryAccess *MA : def_chain(MAP.first)) {
- if (MA == ClobberAt) {
- if (auto *MD = dyn_cast<MemoryDef>(MA)) {
- // instructionClobbersQuery isn't essentially free, so don't use `|=`,
- // since it won't let us short-circuit.
- //
- // Also, note that this can't be hoisted out of the `Worklist` loop,
- // since MD may only act as a clobber for 1 of N MemoryLocations.
- FoundClobber =
- FoundClobber || MSSA.isLiveOnEntryDef(MD) ||
- instructionClobbersQuery(MD, MAP.second, Query.Inst, AA);
- }
- break;
- }
-
- // We should never hit liveOnEntry, unless it's the clobber.
- assert(!MSSA.isLiveOnEntryDef(MA) && "Hit liveOnEntry before clobber?");
-
- if (auto *MD = dyn_cast<MemoryDef>(MA)) {
- (void)MD;
- assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA) &&
- "Found clobber before reaching ClobberAt!");
- continue;
- }
-
- assert(isa<MemoryPhi>(MA));
- Worklist.append(upward_defs_begin({MA, MAP.second}), upward_defs_end());
- }
- }
-
- // If ClobberAt is a MemoryPhi, we can assume something above it acted as a
- // clobber. Otherwise, `ClobberAt` should've acted as a clobber at some point.
- assert((isa<MemoryPhi>(ClobberAt) || FoundClobber) &&
- "ClobberAt never acted as a clobber");
-}
-
-/// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up
-/// in one class.
-class ClobberWalker {
- /// Save a few bytes by using unsigned instead of size_t.
- using ListIndex = unsigned;
-
- /// Represents a span of contiguous MemoryDefs, potentially ending in a
- /// MemoryPhi.
- struct DefPath {
- MemoryLocation Loc;
- // Note that, because we always walk in reverse, Last will always dominate
- // First. Also note that First and Last are inclusive.
- MemoryAccess *First;
- MemoryAccess *Last;
- Optional<ListIndex> Previous;
-
- DefPath(const MemoryLocation &Loc, MemoryAccess *First, MemoryAccess *Last,
- Optional<ListIndex> Previous)
- : Loc(Loc), First(First), Last(Last), Previous(Previous) {}
-
- DefPath(const MemoryLocation &Loc, MemoryAccess *Init,
- Optional<ListIndex> Previous)
- : DefPath(Loc, Init, Init, Previous) {}
- };
-
- const MemorySSA &MSSA;
- AliasAnalysis &AA;
- DominatorTree &DT;
- WalkerCache &WC;
- UpwardsMemoryQuery *Query;
- bool UseCache;
-
- // Phi optimization bookkeeping
- SmallVector<DefPath, 32> Paths;
- DenseSet<ConstMemoryAccessPair> VisitedPhis;
- DenseMap<const BasicBlock *, MemoryAccess *> WalkTargetCache;
-
- void setUseCache(bool Use) { UseCache = Use; }
- bool shouldIgnoreCache() const {
- // UseCache will only be false when we're debugging, or when expensive
- // checks are enabled. In either case, we don't care deeply about speed.
- return LLVM_UNLIKELY(!UseCache);
- }
-
- void addCacheEntry(const MemoryAccess *What, MemoryAccess *To,
- const MemoryLocation &Loc) const {
-// EXPENSIVE_CHECKS because most of these queries are redundant.
-#ifdef EXPENSIVE_CHECKS
- assert(MSSA.dominates(To, What));
-#endif
- if (shouldIgnoreCache())
- return;
- WC.insert(What, To, Loc, Query->IsCall);
- }
-
- MemoryAccess *lookupCache(const MemoryAccess *MA, const MemoryLocation &Loc) {
- return shouldIgnoreCache() ? nullptr : WC.lookup(MA, Loc, Query->IsCall);
- }
-
- void cacheDefPath(const DefPath &DN, MemoryAccess *Target) const {
- if (shouldIgnoreCache())
- return;
-
- for (MemoryAccess *MA : def_chain(DN.First, DN.Last))
- addCacheEntry(MA, Target, DN.Loc);
-
- // DefPaths only express the path we walked. So, DN.Last could either be a
- // thing we want to cache, or not.
- if (DN.Last != Target)
- addCacheEntry(DN.Last, Target, DN.Loc);
- }
-
- /// Find the nearest def or phi that `From` can legally be optimized to.
- ///
- /// FIXME: Deduplicate this with MSSA::findDominatingDef. Ideally, MSSA should
- /// keep track of this information for us, and allow us O(1) lookups of this
- /// info.
- MemoryAccess *getWalkTarget(const MemoryPhi *From) {
- assert(From->getNumOperands() && "Phi with no operands?");
-
- BasicBlock *BB = From->getBlock();
- auto At = WalkTargetCache.find(BB);
- if (At != WalkTargetCache.end())
- return At->second;
-
- SmallVector<const BasicBlock *, 8> ToCache;
- ToCache.push_back(BB);
-
- MemoryAccess *Result = MSSA.getLiveOnEntryDef();
- DomTreeNode *Node = DT.getNode(BB);
- while ((Node = Node->getIDom())) {
- auto At = WalkTargetCache.find(BB);
- if (At != WalkTargetCache.end()) {
- Result = At->second;
- break;
- }
-
- auto *Accesses = MSSA.getBlockAccesses(Node->getBlock());
- if (Accesses) {
- auto Iter = find_if(reverse(*Accesses), [](const MemoryAccess &MA) {
- return !isa<MemoryUse>(MA);
- });
- if (Iter != Accesses->rend()) {
- Result = const_cast<MemoryAccess *>(&*Iter);
- break;
- }
- }
-
- ToCache.push_back(Node->getBlock());
- }
-
- for (const BasicBlock *BB : ToCache)
- WalkTargetCache.insert({BB, Result});
- return Result;
- }
-
- /// Result of calling walkToPhiOrClobber.
- struct UpwardsWalkResult {
- /// The "Result" of the walk. Either a clobber, the last thing we walked, or
- /// both.
- MemoryAccess *Result;
- bool IsKnownClobber;
- bool FromCache;
- };
-
- /// Walk to the next Phi or Clobber in the def chain starting at Desc.Last.
- /// This will update Desc.Last as it walks. It will (optionally) also stop at
- /// StopAt.
- ///
- /// This does not test for whether StopAt is a clobber
- UpwardsWalkResult walkToPhiOrClobber(DefPath &Desc,
- MemoryAccess *StopAt = nullptr) {
- assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world");
-
- for (MemoryAccess *Current : def_chain(Desc.Last)) {
- Desc.Last = Current;
- if (Current == StopAt)
- return {Current, false, false};
-
- if (auto *MD = dyn_cast<MemoryDef>(Current))
- if (MSSA.isLiveOnEntryDef(MD) ||
- instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA))
- return {MD, true, false};
-
- // Cache checks must be done last, because if Current is a clobber, the
- // cache will contain the clobber for Current.
- if (MemoryAccess *MA = lookupCache(Current, Desc.Loc))
- return {MA, true, true};
- }
-
- assert(isa<MemoryPhi>(Desc.Last) &&
- "Ended at a non-clobber that's not a phi?");
- return {Desc.Last, false, false};
- }
-
- void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches,
- ListIndex PriorNode) {
- auto UpwardDefs = make_range(upward_defs_begin({Phi, Paths[PriorNode].Loc}),
- upward_defs_end());
- for (const MemoryAccessPair &P : UpwardDefs) {
- PausedSearches.push_back(Paths.size());
- Paths.emplace_back(P.second, P.first, PriorNode);
- }
- }
-
- /// Represents a search that terminated after finding a clobber. This clobber
- /// may or may not be present in the path of defs from LastNode..SearchStart,
- /// since it may have been retrieved from cache.
- struct TerminatedPath {
- MemoryAccess *Clobber;
- ListIndex LastNode;
- };
-
- /// Get an access that keeps us from optimizing to the given phi.
- ///
- /// PausedSearches is an array of indices into the Paths array. Its incoming
- /// value is the indices of searches that stopped at the last phi optimization
- /// target. It's left in an unspecified state.
- ///
- /// If this returns None, NewPaused is a vector of searches that terminated
- /// at StopWhere. Otherwise, NewPaused is left in an unspecified state.
- Optional<TerminatedPath>
- getBlockingAccess(MemoryAccess *StopWhere,
- SmallVectorImpl<ListIndex> &PausedSearches,
- SmallVectorImpl<ListIndex> &NewPaused,
- SmallVectorImpl<TerminatedPath> &Terminated) {
- assert(!PausedSearches.empty() && "No searches to continue?");
-
- // BFS vs DFS really doesn't make a difference here, so just do a DFS with
- // PausedSearches as our stack.
- while (!PausedSearches.empty()) {
- ListIndex PathIndex = PausedSearches.pop_back_val();
- DefPath &Node = Paths[PathIndex];
-
- // If we've already visited this path with this MemoryLocation, we don't
- // need to do so again.
- //
- // NOTE: That we just drop these paths on the ground makes caching
- // behavior sporadic. e.g. given a diamond:
- // A
- // B C
- // D
- //
- // ...If we walk D, B, A, C, we'll only cache the result of phi
- // optimization for A, B, and D; C will be skipped because it dies here.
- // This arguably isn't the worst thing ever, since:
- // - We generally query things in a top-down order, so if we got below D
- // without needing cache entries for {C, MemLoc}, then chances are
- // that those cache entries would end up ultimately unused.
- // - We still cache things for A, so C only needs to walk up a bit.
- // If this behavior becomes problematic, we can fix without a ton of extra
- // work.
- if (!VisitedPhis.insert({Node.Last, Node.Loc}).second)
- continue;
-
- UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere);
- if (Res.IsKnownClobber) {
- assert(Res.Result != StopWhere || Res.FromCache);
- // If this wasn't a cache hit, we hit a clobber when walking. That's a
- // failure.
- TerminatedPath Term{Res.Result, PathIndex};
- if (!Res.FromCache || !MSSA.dominates(Res.Result, StopWhere))
- return Term;
-
- // Otherwise, it's a valid thing to potentially optimize to.
- Terminated.push_back(Term);
- continue;
- }
-
- if (Res.Result == StopWhere) {
- // We've hit our target. Save this path off for if we want to continue
- // walking.
- NewPaused.push_back(PathIndex);
- continue;
- }
-
- assert(!MSSA.isLiveOnEntryDef(Res.Result) && "liveOnEntry is a clobber");
- addSearches(cast<MemoryPhi>(Res.Result), PausedSearches, PathIndex);
- }
-
- return None;
- }
-
- template <typename T, typename Walker>
- struct generic_def_path_iterator
- : public iterator_facade_base<generic_def_path_iterator<T, Walker>,
- std::forward_iterator_tag, T *> {
- generic_def_path_iterator() : W(nullptr), N(None) {}
- generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {}
-
- T &operator*() const { return curNode(); }
-
- generic_def_path_iterator &operator++() {
- N = curNode().Previous;
- return *this;
- }
-
- bool operator==(const generic_def_path_iterator &O) const {
- if (N.hasValue() != O.N.hasValue())
- return false;
- return !N.hasValue() || *N == *O.N;
- }
-
- private:
- T &curNode() const { return W->Paths[*N]; }
-
- Walker *W;
- Optional<ListIndex> N;
- };
-
- using def_path_iterator = generic_def_path_iterator<DefPath, ClobberWalker>;
- using const_def_path_iterator =
- generic_def_path_iterator<const DefPath, const ClobberWalker>;
-
- iterator_range<def_path_iterator> def_path(ListIndex From) {
- return make_range(def_path_iterator(this, From), def_path_iterator());
- }
-
- iterator_range<const_def_path_iterator> const_def_path(ListIndex From) const {
- return make_range(const_def_path_iterator(this, From),
- const_def_path_iterator());
- }
-
- struct OptznResult {
- /// The path that contains our result.
- TerminatedPath PrimaryClobber;
- /// The paths that we can legally cache back from, but that aren't
- /// necessarily the result of the Phi optimization.
- SmallVector<TerminatedPath, 4> OtherClobbers;
- };
-
- ListIndex defPathIndex(const DefPath &N) const {
- // The assert looks nicer if we don't need to do &N
- const DefPath *NP = &N;
- assert(!Paths.empty() && NP >= &Paths.front() && NP <= &Paths.back() &&
- "Out of bounds DefPath!");
- return NP - &Paths.front();
- }
-
- /// Try to optimize a phi as best as we can. Returns a SmallVector of Paths
- /// that act as legal clobbers. Note that this won't return *all* clobbers.
- ///
- /// Phi optimization algorithm tl;dr:
- /// - Find the earliest def/phi, A, we can optimize to
- /// - Find if all paths from the starting memory access ultimately reach A
- /// - If not, optimization isn't possible.
- /// - Otherwise, walk from A to another clobber or phi, A'.
- /// - If A' is a def, we're done.
- /// - If A' is a phi, try to optimize it.
- ///
- /// A path is a series of {MemoryAccess, MemoryLocation} pairs. A path
- /// terminates when a MemoryAccess that clobbers said MemoryLocation is found.
- OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start,
- const MemoryLocation &Loc) {
- assert(Paths.empty() && VisitedPhis.empty() &&
- "Reset the optimization state.");
-
- Paths.emplace_back(Loc, Start, Phi, None);
- // Stores how many "valid" optimization nodes we had prior to calling
- // addSearches/getBlockingAccess. Necessary for caching if we had a blocker.
- auto PriorPathsSize = Paths.size();
-
- SmallVector<ListIndex, 16> PausedSearches;
- SmallVector<ListIndex, 8> NewPaused;
- SmallVector<TerminatedPath, 4> TerminatedPaths;
-
- addSearches(Phi, PausedSearches, 0);
-
- // Moves the TerminatedPath with the "most dominated" Clobber to the end of
- // Paths.
- auto MoveDominatedPathToEnd = [&](SmallVectorImpl<TerminatedPath> &Paths) {
- assert(!Paths.empty() && "Need a path to move");
- auto Dom = Paths.begin();
- for (auto I = std::next(Dom), E = Paths.end(); I != E; ++I)
- if (!MSSA.dominates(I->Clobber, Dom->Clobber))
- Dom = I;
- auto Last = Paths.end() - 1;
- if (Last != Dom)
- std::iter_swap(Last, Dom);
- };
-
- MemoryPhi *Current = Phi;
- while (1) {
- assert(!MSSA.isLiveOnEntryDef(Current) &&
- "liveOnEntry wasn't treated as a clobber?");
-
- MemoryAccess *Target = getWalkTarget(Current);
- // If a TerminatedPath doesn't dominate Target, then it wasn't a legal
- // optimization for the prior phi.
- assert(all_of(TerminatedPaths, [&](const TerminatedPath &P) {
- return MSSA.dominates(P.Clobber, Target);
- }));
-
- // FIXME: This is broken, because the Blocker may be reported to be
- // liveOnEntry, and we'll happily wait for that to disappear (read: never)
- // For the moment, this is fine, since we do nothing with blocker info.
- if (Optional<TerminatedPath> Blocker = getBlockingAccess(
- Target, PausedSearches, NewPaused, TerminatedPaths)) {
- // Cache our work on the blocking node, since we know that's correct.
- cacheDefPath(Paths[Blocker->LastNode], Blocker->Clobber);
-
- // Find the node we started at. We can't search based on N->Last, since
- // we may have gone around a loop with a different MemoryLocation.
- auto Iter = find_if(def_path(Blocker->LastNode), [&](const DefPath &N) {
- return defPathIndex(N) < PriorPathsSize;
- });
- assert(Iter != def_path_iterator());
-
- DefPath &CurNode = *Iter;
- assert(CurNode.Last == Current);
-
- // Two things:
- // A. We can't reliably cache all of NewPaused back. Consider a case
- // where we have two paths in NewPaused; one of which can't optimize
- // above this phi, whereas the other can. If we cache the second path
- // back, we'll end up with suboptimal cache entries. We can handle
- // cases like this a bit better when we either try to find all
- // clobbers that block phi optimization, or when our cache starts
- // supporting unfinished searches.
- // B. We can't reliably cache TerminatedPaths back here without doing
- // extra checks; consider a case like:
- // T
- // / \
- // D C
- // \ /
- // S
- // Where T is our target, C is a node with a clobber on it, D is a
- // diamond (with a clobber *only* on the left or right node, N), and
- // S is our start. Say we walk to D, through the node opposite N
- // (read: ignoring the clobber), and see a cache entry in the top
- // node of D. That cache entry gets put into TerminatedPaths. We then
- // walk up to C (N is later in our worklist), find the clobber, and
- // quit. If we append TerminatedPaths to OtherClobbers, we'll cache
- // the bottom part of D to the cached clobber, ignoring the clobber
- // in N. Again, this problem goes away if we start tracking all
- // blockers for a given phi optimization.
- TerminatedPath Result{CurNode.Last, defPathIndex(CurNode)};
- return {Result, {}};
- }
-
- // If there's nothing left to search, then all paths led to valid clobbers
- // that we got from our cache; pick the nearest to the start, and allow
- // the rest to be cached back.
- if (NewPaused.empty()) {
- MoveDominatedPathToEnd(TerminatedPaths);
- TerminatedPath Result = TerminatedPaths.pop_back_val();
- return {Result, std::move(TerminatedPaths)};
- }
-
- MemoryAccess *DefChainEnd = nullptr;
- SmallVector<TerminatedPath, 4> Clobbers;
- for (ListIndex Paused : NewPaused) {
- UpwardsWalkResult WR = walkToPhiOrClobber(Paths[Paused]);
- if (WR.IsKnownClobber)
- Clobbers.push_back({WR.Result, Paused});
- else
- // Micro-opt: If we hit the end of the chain, save it.
- DefChainEnd = WR.Result;
- }
-
- if (!TerminatedPaths.empty()) {
- // If we couldn't find the dominating phi/liveOnEntry in the above loop,
- // do it now.
- if (!DefChainEnd)
- for (MemoryAccess *MA : def_chain(Target))
- DefChainEnd = MA;
-
- // If any of the terminated paths don't dominate the phi we'll try to
- // optimize, we need to figure out what they are and quit.
- const BasicBlock *ChainBB = DefChainEnd->getBlock();
- for (const TerminatedPath &TP : TerminatedPaths) {
- // Because we know that DefChainEnd is as "high" as we can go, we
- // don't need local dominance checks; BB dominance is sufficient.
- if (DT.dominates(ChainBB, TP.Clobber->getBlock()))
- Clobbers.push_back(TP);
- }
- }
-
- // If we have clobbers in the def chain, find the one closest to Current
- // and quit.
- if (!Clobbers.empty()) {
- MoveDominatedPathToEnd(Clobbers);
- TerminatedPath Result = Clobbers.pop_back_val();
- return {Result, std::move(Clobbers)};
- }
-
- assert(all_of(NewPaused,
- [&](ListIndex I) { return Paths[I].Last == DefChainEnd; }));
-
- // Because liveOnEntry is a clobber, this must be a phi.
- auto *DefChainPhi = cast<MemoryPhi>(DefChainEnd);
-
- PriorPathsSize = Paths.size();
- PausedSearches.clear();
- for (ListIndex I : NewPaused)
- addSearches(DefChainPhi, PausedSearches, I);
- NewPaused.clear();
-
- Current = DefChainPhi;
- }
- }
-
- /// Caches everything in an OptznResult.
- void cacheOptResult(const OptznResult &R) {
- if (R.OtherClobbers.empty()) {
- // If we're not going to be caching OtherClobbers, don't bother with
- // marking visited/etc.
- for (const DefPath &N : const_def_path(R.PrimaryClobber.LastNode))
- cacheDefPath(N, R.PrimaryClobber.Clobber);
- return;
- }
-
- // PrimaryClobber is our answer. If we can cache anything back, we need to
- // stop caching when we visit PrimaryClobber.
- SmallBitVector Visited(Paths.size());
- for (const DefPath &N : const_def_path(R.PrimaryClobber.LastNode)) {
- Visited[defPathIndex(N)] = true;
- cacheDefPath(N, R.PrimaryClobber.Clobber);
- }
-
- for (const TerminatedPath &P : R.OtherClobbers) {
- for (const DefPath &N : const_def_path(P.LastNode)) {
- ListIndex NIndex = defPathIndex(N);
- if (Visited[NIndex])
- break;
- Visited[NIndex] = true;
- cacheDefPath(N, P.Clobber);
- }
- }
- }
-
- void verifyOptResult(const OptznResult &R) const {
- assert(all_of(R.OtherClobbers, [&](const TerminatedPath &P) {
- return MSSA.dominates(P.Clobber, R.PrimaryClobber.Clobber);
- }));
- }
-
- void resetPhiOptznState() {
- Paths.clear();
- VisitedPhis.clear();
- }
-
-public:
- ClobberWalker(const MemorySSA &MSSA, AliasAnalysis &AA, DominatorTree &DT,
- WalkerCache &WC)
- : MSSA(MSSA), AA(AA), DT(DT), WC(WC), UseCache(true) {}
-
- void reset() { WalkTargetCache.clear(); }
-
- /// Finds the nearest clobber for the given query, optimizing phis if
- /// possible.
- MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q,
- bool UseWalkerCache = true) {
- setUseCache(UseWalkerCache);
- Query = &Q;
-
- MemoryAccess *Current = Start;
- // This walker pretends uses don't exist. If we're handed one, silently grab
- // its def. (This has the nice side-effect of ensuring we never cache uses)
- if (auto *MU = dyn_cast<MemoryUse>(Start))
- Current = MU->getDefiningAccess();
-
- DefPath FirstDesc(Q.StartingLoc, Current, Current, None);
- // Fast path for the overly-common case (no crazy phi optimization
- // necessary)
- UpwardsWalkResult WalkResult = walkToPhiOrClobber(FirstDesc);
- MemoryAccess *Result;
- if (WalkResult.IsKnownClobber) {
- cacheDefPath(FirstDesc, WalkResult.Result);
- Result = WalkResult.Result;
- } else {
- OptznResult OptRes = tryOptimizePhi(cast<MemoryPhi>(FirstDesc.Last),
- Current, Q.StartingLoc);
- verifyOptResult(OptRes);
- cacheOptResult(OptRes);
- resetPhiOptznState();
- Result = OptRes.PrimaryClobber.Clobber;
- }
-
-#ifdef EXPENSIVE_CHECKS
- checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA);
-#endif
- return Result;
- }
-
- void verify(const MemorySSA *MSSA) { assert(MSSA == &this->MSSA); }
-};
-
-struct RenamePassData {
- DomTreeNode *DTN;
- DomTreeNode::const_iterator ChildIt;
- MemoryAccess *IncomingVal;
-
- RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It,
- MemoryAccess *M)
- : DTN(D), ChildIt(It), IncomingVal(M) {}
- void swap(RenamePassData &RHS) {
- std::swap(DTN, RHS.DTN);
- std::swap(ChildIt, RHS.ChildIt);
- std::swap(IncomingVal, RHS.IncomingVal);
- }
-};
-} // anonymous namespace
-
-namespace llvm {
-/// \brief A MemorySSAWalker that does AA walks and caching of lookups to
-/// disambiguate accesses.
-///
-/// FIXME: The current implementation of this can take quadratic space in rare
-/// cases. This can be fixed, but it is something to note until it is fixed.
-///
-/// In order to trigger this behavior, you need to store to N distinct locations
-/// (that AA can prove don't alias), perform M stores to other memory
-/// locations that AA can prove don't alias any of the initial N locations, and
-/// then load from all of the N locations. In this case, we insert M cache
-/// entries for each of the N loads.
-///
-/// For example:
-/// define i32 @foo() {
-/// %a = alloca i32, align 4
-/// %b = alloca i32, align 4
-/// store i32 0, i32* %a, align 4
-/// store i32 0, i32* %b, align 4
-///
-/// ; Insert M stores to other memory that doesn't alias %a or %b here
-///
-/// %c = load i32, i32* %a, align 4 ; Caches M entries in
-/// ; CachedUpwardsClobberingAccess for the
-/// ; MemoryLocation %a
-/// %d = load i32, i32* %b, align 4 ; Caches M entries in
-/// ; CachedUpwardsClobberingAccess for the
-/// ; MemoryLocation %b
-///
-/// ; For completeness' sake, loading %a or %b again would not cache *another*
-/// ; M entries.
-/// %r = add i32 %c, %d
-/// ret i32 %r
-/// }
-class MemorySSA::CachingWalker final : public MemorySSAWalker {
- WalkerCache Cache;
- ClobberWalker Walker;
- bool AutoResetWalker;
-
- MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &);
- void verifyRemoved(MemoryAccess *);
-
-public:
- CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *);
- ~CachingWalker() override;
-
- using MemorySSAWalker::getClobberingMemoryAccess;
- MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override;
- MemoryAccess *getClobberingMemoryAccess(MemoryAccess *,
- const MemoryLocation &) override;
- void invalidateInfo(MemoryAccess *) override;
-
- /// Whether we call resetClobberWalker() after each time we *actually* walk to
- /// answer a clobber query.
- void setAutoResetWalker(bool AutoReset) { AutoResetWalker = AutoReset; }
-
- /// Drop the walker's persistent data structures. At the moment, this means
- /// "drop the walker's cache of BasicBlocks ->
- /// earliest-MemoryAccess-we-can-optimize-to". This is necessary if we're
- /// going to have DT updates, if we remove MemoryAccesses, etc.
- void resetClobberWalker() { Walker.reset(); }
-
- void verify(const MemorySSA *MSSA) override {
- MemorySSAWalker::verify(MSSA);
- Walker.verify(MSSA);
- }
-};
-
-/// \brief Rename a single basic block into MemorySSA form.
-/// Uses the standard SSA renaming algorithm.
-/// \returns The new incoming value.
-MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB,
- MemoryAccess *IncomingVal) {
- auto It = PerBlockAccesses.find(BB);
- // Skip most processing if the list is empty.
- if (It != PerBlockAccesses.end()) {
- AccessList *Accesses = It->second.get();
- for (MemoryAccess &L : *Accesses) {
- if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(&L)) {
- if (MUD->getDefiningAccess() == nullptr)
- MUD->setDefiningAccess(IncomingVal);
- if (isa<MemoryDef>(&L))
- IncomingVal = &L;
- } else {
- IncomingVal = &L;
- }
- }
- }
-
- // Pass through values to our successors
- for (const BasicBlock *S : successors(BB)) {
- auto It = PerBlockAccesses.find(S);
- // Rename the phi nodes in our successor block
- if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front()))
- continue;
- AccessList *Accesses = It->second.get();
- auto *Phi = cast<MemoryPhi>(&Accesses->front());
- Phi->addIncoming(IncomingVal, BB);
- }
-
- return IncomingVal;
-}
-
-/// \brief This is the standard SSA renaming algorithm.
-///
-/// We walk the dominator tree in preorder, renaming accesses, and then filling
-/// in phi nodes in our successors.
-void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal,
- SmallPtrSet<BasicBlock *, 16> &Visited) {
- SmallVector<RenamePassData, 32> WorkStack;
- IncomingVal = renameBlock(Root->getBlock(), IncomingVal);
- WorkStack.push_back({Root, Root->begin(), IncomingVal});
- Visited.insert(Root->getBlock());
-
- while (!WorkStack.empty()) {
- DomTreeNode *Node = WorkStack.back().DTN;
- DomTreeNode::const_iterator ChildIt = WorkStack.back().ChildIt;
- IncomingVal = WorkStack.back().IncomingVal;
-
- if (ChildIt == Node->end()) {
- WorkStack.pop_back();
- } else {
- DomTreeNode *Child = *ChildIt;
- ++WorkStack.back().ChildIt;
- BasicBlock *BB = Child->getBlock();
- Visited.insert(BB);
- IncomingVal = renameBlock(BB, IncomingVal);
- WorkStack.push_back({Child, Child->begin(), IncomingVal});
- }
- }
-}
-
-/// \brief Compute dominator levels, used by the phi insertion algorithm above.
-void MemorySSA::computeDomLevels(DenseMap<DomTreeNode *, unsigned> &DomLevels) {
- for (auto DFI = df_begin(DT->getRootNode()), DFE = df_end(DT->getRootNode());
- DFI != DFE; ++DFI)
- DomLevels[*DFI] = DFI.getPathLength() - 1;
-}
-
-/// \brief This handles unreachable block accesses by deleting phi nodes in
-/// unreachable blocks, and marking all other unreachable MemoryAccess's as
-/// being uses of the live on entry definition.
-void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) {
- assert(!DT->isReachableFromEntry(BB) &&
- "Reachable block found while handling unreachable blocks");
-
- // Make sure phi nodes in our reachable successors end up with a
- // LiveOnEntryDef for our incoming edge, even though our block is forward
- // unreachable. We could just disconnect these blocks from the CFG fully,
- // but we do not right now.
- for (const BasicBlock *S : successors(BB)) {
- if (!DT->isReachableFromEntry(S))
- continue;
- auto It = PerBlockAccesses.find(S);
- // Rename the phi nodes in our successor block
- if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front()))
- continue;
- AccessList *Accesses = It->second.get();
- auto *Phi = cast<MemoryPhi>(&Accesses->front());
- Phi->addIncoming(LiveOnEntryDef.get(), BB);
- }
-
- auto It = PerBlockAccesses.find(BB);
- if (It == PerBlockAccesses.end())
- return;
-
- auto &Accesses = It->second;
- for (auto AI = Accesses->begin(), AE = Accesses->end(); AI != AE;) {
- auto Next = std::next(AI);
- // If we have a phi, just remove it. We are going to replace all
- // users with live on entry.
- if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(AI))
- UseOrDef->setDefiningAccess(LiveOnEntryDef.get());
- else
- Accesses->erase(AI);
- AI = Next;
- }
-}
-
-MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT)
- : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr),
- NextID(INVALID_MEMORYACCESS_ID) {
- buildMemorySSA();
-}
-
-MemorySSA::~MemorySSA() {
- // Drop all our references
- for (const auto &Pair : PerBlockAccesses)
- for (MemoryAccess &MA : *Pair.second)
- MA.dropAllReferences();
-}
-
-MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) {
- auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr));
-
- if (Res.second)
- Res.first->second = make_unique<AccessList>();
- return Res.first->second.get();
-}
-
-/// This class is a batch walker of all MemoryUse's in the program, and points
-/// their defining access at the thing that actually clobbers them. Because it
-/// is a batch walker that touches everything, it does not operate like the
-/// other walkers. This walker is basically performing a top-down SSA renaming
-/// pass, where the version stack is used as the cache. This enables it to be
-/// significantly more time and memory efficient than using the regular walker,
-/// which is walking bottom-up.
-class MemorySSA::OptimizeUses {
-public:
- OptimizeUses(MemorySSA *MSSA, MemorySSAWalker *Walker, AliasAnalysis *AA,
- DominatorTree *DT)
- : MSSA(MSSA), Walker(Walker), AA(AA), DT(DT) {
- Walker = MSSA->getWalker();
- }
-
- void optimizeUses();
-
-private:
- /// This represents where a given memorylocation is in the stack.
- struct MemlocStackInfo {
- // This essentially is keeping track of versions of the stack. Whenever
- // the stack changes due to pushes or pops, these versions increase.
- unsigned long StackEpoch;
- unsigned long PopEpoch;
- // This is the lower bound of places on the stack to check. It is equal to
- // the place the last stack walk ended.
- // Note: Correctness depends on this being initialized to 0, which densemap
- // does
- unsigned long LowerBound;
- const BasicBlock *LowerBoundBlock;
- // This is where the last walk for this memory location ended.
- unsigned long LastKill;
- bool LastKillValid;
- };
- void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &,
- SmallVectorImpl<MemoryAccess *> &,
- DenseMap<MemoryLocOrCall, MemlocStackInfo> &);
- MemorySSA *MSSA;
- MemorySSAWalker *Walker;
- AliasAnalysis *AA;
- DominatorTree *DT;
-};
-
-/// Optimize the uses in a given block This is basically the SSA renaming
-/// algorithm, with one caveat: We are able to use a single stack for all
-/// MemoryUses. This is because the set of *possible* reaching MemoryDefs is
-/// the same for every MemoryUse. The *actual* clobbering MemoryDef is just
-/// going to be some position in that stack of possible ones.
-///
-/// We track the stack positions that each MemoryLocation needs
-/// to check, and last ended at. This is because we only want to check the
-/// things that changed since last time. The same MemoryLocation should
-/// get clobbered by the same store (getModRefInfo does not use invariantness or
-/// things like this, and if they start, we can modify MemoryLocOrCall to
-/// include relevant data)
-void MemorySSA::OptimizeUses::optimizeUsesInBlock(
- const BasicBlock *BB, unsigned long &StackEpoch, unsigned long &PopEpoch,
- SmallVectorImpl<MemoryAccess *> &VersionStack,
- DenseMap<MemoryLocOrCall, MemlocStackInfo> &LocStackInfo) {
-
- /// If no accesses, nothing to do.
- MemorySSA::AccessList *Accesses = MSSA->getWritableBlockAccesses(BB);
- if (Accesses == nullptr)
- return;
-
- // Pop everything that doesn't dominate the current block off the stack,
- // increment the PopEpoch to account for this.
- while (!VersionStack.empty()) {
- BasicBlock *BackBlock = VersionStack.back()->getBlock();
- if (DT->dominates(BackBlock, BB))
- break;
- while (VersionStack.back()->getBlock() == BackBlock)
- VersionStack.pop_back();
- ++PopEpoch;
- }
- for (MemoryAccess &MA : *Accesses) {
- auto *MU = dyn_cast<MemoryUse>(&MA);
- if (!MU) {
- VersionStack.push_back(&MA);
- ++StackEpoch;
- continue;
- }
-
- if (isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())) {
- MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true);
- continue;
- }
-
- MemoryLocOrCall UseMLOC(MU);
- auto &LocInfo = LocStackInfo[UseMLOC];
- // If the pop epoch changed, it means we've removed stuff from top of
- // stack due to changing blocks. We may have to reset the lower bound or
- // last kill info.
- if (LocInfo.PopEpoch != PopEpoch) {
- LocInfo.PopEpoch = PopEpoch;
- LocInfo.StackEpoch = StackEpoch;
- // If the lower bound was in something that no longer dominates us, we
- // have to reset it.
- // We can't simply track stack size, because the stack may have had
- // pushes/pops in the meantime.
- // XXX: This is non-optimal, but only is slower cases with heavily
- // branching dominator trees. To get the optimal number of queries would
- // be to make lowerbound and lastkill a per-loc stack, and pop it until
- // the top of that stack dominates us. This does not seem worth it ATM.
- // A much cheaper optimization would be to always explore the deepest
- // branch of the dominator tree first. This will guarantee this resets on
- // the smallest set of blocks.
- if (LocInfo.LowerBoundBlock && LocInfo.LowerBoundBlock != BB &&
- !DT->dominates(LocInfo.LowerBoundBlock, BB)) {
- // Reset the lower bound of things to check.
- // TODO: Some day we should be able to reset to last kill, rather than
- // 0.
- LocInfo.LowerBound = 0;
- LocInfo.LowerBoundBlock = VersionStack[0]->getBlock();
- LocInfo.LastKillValid = false;
- }
- } else if (LocInfo.StackEpoch != StackEpoch) {
- // If all that has changed is the StackEpoch, we only have to check the
- // new things on the stack, because we've checked everything before. In
- // this case, the lower bound of things to check remains the same.
- LocInfo.PopEpoch = PopEpoch;
- LocInfo.StackEpoch = StackEpoch;
- }
- if (!LocInfo.LastKillValid) {
- LocInfo.LastKill = VersionStack.size() - 1;
- LocInfo.LastKillValid = true;
- }
-
- // At this point, we should have corrected last kill and LowerBound to be
- // in bounds.
- assert(LocInfo.LowerBound < VersionStack.size() &&
- "Lower bound out of range");
- assert(LocInfo.LastKill < VersionStack.size() &&
- "Last kill info out of range");
- // In any case, the new upper bound is the top of the stack.
- unsigned long UpperBound = VersionStack.size() - 1;
-
- if (UpperBound - LocInfo.LowerBound > MaxCheckLimit) {
- DEBUG(dbgs() << "MemorySSA skipping optimization of " << *MU << " ("
- << *(MU->getMemoryInst()) << ")"
- << " because there are " << UpperBound - LocInfo.LowerBound
- << " stores to disambiguate\n");
- // Because we did not walk, LastKill is no longer valid, as this may
- // have been a kill.
- LocInfo.LastKillValid = false;
- continue;
- }
- bool FoundClobberResult = false;
- while (UpperBound > LocInfo.LowerBound) {
- if (isa<MemoryPhi>(VersionStack[UpperBound])) {
- // For phis, use the walker, see where we ended up, go there
- Instruction *UseInst = MU->getMemoryInst();
- MemoryAccess *Result = Walker->getClobberingMemoryAccess(UseInst);
- // We are guaranteed to find it or something is wrong
- while (VersionStack[UpperBound] != Result) {
- assert(UpperBound != 0);
- --UpperBound;
- }
- FoundClobberResult = true;
- break;
- }
-
- MemoryDef *MD = cast<MemoryDef>(VersionStack[UpperBound]);
- // If the lifetime of the pointer ends at this instruction, it's live on
- // entry.
- if (!UseMLOC.IsCall && lifetimeEndsAt(MD, UseMLOC.getLoc(), *AA)) {
- // Reset UpperBound to liveOnEntryDef's place in the stack
- UpperBound = 0;
- FoundClobberResult = true;
- break;
- }
- if (instructionClobbersQuery(MD, MU, UseMLOC, *AA)) {
- FoundClobberResult = true;
- break;
- }
- --UpperBound;
- }
- // At the end of this loop, UpperBound is either a clobber, or lower bound
- // PHI walking may cause it to be < LowerBound, and in fact, < LastKill.
- if (FoundClobberResult || UpperBound < LocInfo.LastKill) {
- MU->setDefiningAccess(VersionStack[UpperBound], true);
- // We were last killed now by where we got to
- LocInfo.LastKill = UpperBound;
- } else {
- // Otherwise, we checked all the new ones, and now we know we can get to
- // LastKill.
- MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true);
- }
- LocInfo.LowerBound = VersionStack.size() - 1;
- LocInfo.LowerBoundBlock = BB;
- }
-}
-
-/// Optimize uses to point to their actual clobbering definitions.
-void MemorySSA::OptimizeUses::optimizeUses() {
-
- // We perform a non-recursive top-down dominator tree walk
- struct StackInfo {
- const DomTreeNode *Node;
- DomTreeNode::const_iterator Iter;
- };
-
- SmallVector<MemoryAccess *, 16> VersionStack;
- SmallVector<StackInfo, 16> DomTreeWorklist;
- DenseMap<MemoryLocOrCall, MemlocStackInfo> LocStackInfo;
- VersionStack.push_back(MSSA->getLiveOnEntryDef());
-
- unsigned long StackEpoch = 1;
- unsigned long PopEpoch = 1;
- for (const auto *DomNode : depth_first(DT->getRootNode()))
- optimizeUsesInBlock(DomNode->getBlock(), StackEpoch, PopEpoch, VersionStack,
- LocStackInfo);
-}
-
-void MemorySSA::placePHINodes(
- const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks,
- const DenseMap<const BasicBlock *, unsigned int> &BBNumbers) {
- // Determine where our MemoryPhi's should go
- ForwardIDFCalculator IDFs(*DT);
- IDFs.setDefiningBlocks(DefiningBlocks);
- SmallVector<BasicBlock *, 32> IDFBlocks;
- IDFs.calculate(IDFBlocks);
-
- std::sort(IDFBlocks.begin(), IDFBlocks.end(),
- [&BBNumbers](const BasicBlock *A, const BasicBlock *B) {
- return BBNumbers.lookup(A) < BBNumbers.lookup(B);
- });
-
- // Now place MemoryPhi nodes.
- for (auto &BB : IDFBlocks) {
- // Insert phi node
- AccessList *Accesses = getOrCreateAccessList(BB);
- MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++);
- ValueToMemoryAccess[BB] = Phi;
- // Phi's always are placed at the front of the block.
- Accesses->push_front(Phi);
- }
-}
-
-void MemorySSA::buildMemorySSA() {
- // We create an access to represent "live on entry", for things like
- // arguments or users of globals, where the memory they use is defined before
- // the beginning of the function. We do not actually insert it into the IR.
- // We do not define a live on exit for the immediate uses, and thus our
- // semantics do *not* imply that something with no immediate uses can simply
- // be removed.
- BasicBlock &StartingPoint = F.getEntryBlock();
- LiveOnEntryDef = make_unique<MemoryDef>(F.getContext(), nullptr, nullptr,
- &StartingPoint, NextID++);
- DenseMap<const BasicBlock *, unsigned int> BBNumbers;
- unsigned NextBBNum = 0;
-
- // We maintain lists of memory accesses per-block, trading memory for time. We
- // could just look up the memory access for every possible instruction in the
- // stream.
- SmallPtrSet<BasicBlock *, 32> DefiningBlocks;
- SmallPtrSet<BasicBlock *, 32> DefUseBlocks;
- // Go through each block, figure out where defs occur, and chain together all
- // the accesses.
- for (BasicBlock &B : F) {
- BBNumbers[&B] = NextBBNum++;
- bool InsertIntoDef = false;
- AccessList *Accesses = nullptr;
- for (Instruction &I : B) {
- MemoryUseOrDef *MUD = createNewAccess(&I);
- if (!MUD)
- continue;
- InsertIntoDef |= isa<MemoryDef>(MUD);
-
- if (!Accesses)
- Accesses = getOrCreateAccessList(&B);
- Accesses->push_back(MUD);
- }
- if (InsertIntoDef)
- DefiningBlocks.insert(&B);
- if (Accesses)
- DefUseBlocks.insert(&B);
- }
- placePHINodes(DefiningBlocks, BBNumbers);
-
- // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get
- // filled in with all blocks.
- SmallPtrSet<BasicBlock *, 16> Visited;
- renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited);
-
- CachingWalker *Walker = getWalkerImpl();
-
- // We're doing a batch of updates; don't drop useful caches between them.
- Walker->setAutoResetWalker(false);
- OptimizeUses(this, Walker, AA, DT).optimizeUses();
- Walker->setAutoResetWalker(true);
- Walker->resetClobberWalker();
-
- // Mark the uses in unreachable blocks as live on entry, so that they go
- // somewhere.
- for (auto &BB : F)
- if (!Visited.count(&BB))
- markUnreachableAsLiveOnEntry(&BB);
-}
-
-MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); }
-
-MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() {
- if (Walker)
- return Walker.get();
-
- Walker = make_unique<CachingWalker>(this, AA, DT);
- return Walker.get();
-}
-
-MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) {
- assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB");
- AccessList *Accesses = getOrCreateAccessList(BB);
- MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++);
- ValueToMemoryAccess[BB] = Phi;
- // Phi's always are placed at the front of the block.
- Accesses->push_front(Phi);
- BlockNumberingValid.erase(BB);
- return Phi;
-}
-
-MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I,
- MemoryAccess *Definition) {
- assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI");
- MemoryUseOrDef *NewAccess = createNewAccess(I);
- assert(
- NewAccess != nullptr &&
- "Tried to create a memory access for a non-memory touching instruction");
- NewAccess->setDefiningAccess(Definition);
- return NewAccess;
-}
-
-MemoryAccess *MemorySSA::createMemoryAccessInBB(Instruction *I,
- MemoryAccess *Definition,
- const BasicBlock *BB,
- InsertionPlace Point) {
- MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition);
- auto *Accesses = getOrCreateAccessList(BB);
- if (Point == Beginning) {
- // It goes after any phi nodes
- auto AI = find_if(
- *Accesses, [](const MemoryAccess &MA) { return !isa<MemoryPhi>(MA); });
-
- Accesses->insert(AI, NewAccess);
- } else {
- Accesses->push_back(NewAccess);
- }
- BlockNumberingValid.erase(BB);
- return NewAccess;
-}
-
-MemoryUseOrDef *MemorySSA::createMemoryAccessBefore(Instruction *I,
- MemoryAccess *Definition,
- MemoryUseOrDef *InsertPt) {
- assert(I->getParent() == InsertPt->getBlock() &&
- "New and old access must be in the same block");
- MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition);
- auto *Accesses = getOrCreateAccessList(InsertPt->getBlock());
- Accesses->insert(AccessList::iterator(InsertPt), NewAccess);
- BlockNumberingValid.erase(InsertPt->getBlock());
- return NewAccess;
-}
-
-MemoryUseOrDef *MemorySSA::createMemoryAccessAfter(Instruction *I,
- MemoryAccess *Definition,
- MemoryAccess *InsertPt) {
- assert(I->getParent() == InsertPt->getBlock() &&
- "New and old access must be in the same block");
- MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition);
- auto *Accesses = getOrCreateAccessList(InsertPt->getBlock());
- Accesses->insertAfter(AccessList::iterator(InsertPt), NewAccess);
- BlockNumberingValid.erase(InsertPt->getBlock());
- return NewAccess;
-}
-
-void MemorySSA::spliceMemoryAccessAbove(MemoryDef *Where,
- MemoryUseOrDef *What) {
- assert(What != getLiveOnEntryDef() &&
- Where != getLiveOnEntryDef() && "Can't splice (above) LOE.");
- assert(dominates(Where, What) && "Only upwards splices are permitted.");
-
- if (Where == What)
- return;
- if (isa<MemoryDef>(What)) {
- // TODO: possibly use removeMemoryAccess' more efficient RAUW
- What->replaceAllUsesWith(What->getDefiningAccess());
- What->setDefiningAccess(Where->getDefiningAccess());
- Where->setDefiningAccess(What);
- }
- AccessList *Src = getWritableBlockAccesses(What->getBlock());
- AccessList *Dest = getWritableBlockAccesses(Where->getBlock());
- Dest->splice(AccessList::iterator(Where), *Src, What);
-
- BlockNumberingValid.erase(What->getBlock());
- if (What->getBlock() != Where->getBlock())
- BlockNumberingValid.erase(Where->getBlock());
-}
-
-/// \brief Helper function to create new memory accesses
-MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
- // The assume intrinsic has a control dependency which we model by claiming
- // that it writes arbitrarily. Ignore that fake memory dependency here.
- // FIXME: Replace this special casing with a more accurate modelling of
- // assume's control dependency.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
- if (II->getIntrinsicID() == Intrinsic::assume)
- return nullptr;
-
- // Find out what affect this instruction has on memory.
- ModRefInfo ModRef = AA->getModRefInfo(I);
- bool Def = bool(ModRef & MRI_Mod);
- bool Use = bool(ModRef & MRI_Ref);
-
- // It's possible for an instruction to not modify memory at all. During
- // construction, we ignore them.
- if (!Def && !Use)
- return nullptr;
-
- assert((Def || Use) &&
- "Trying to create a memory access with a non-memory instruction");
-
- MemoryUseOrDef *MUD;
- if (Def)
- MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++);
- else
- MUD = new MemoryUse(I->getContext(), nullptr, I, I->getParent());
- ValueToMemoryAccess[I] = MUD;
- return MUD;
-}
-
-MemoryAccess *MemorySSA::findDominatingDef(BasicBlock *UseBlock,
- enum InsertionPlace Where) {
- // Handle the initial case
- if (Where == Beginning)
- // The only thing that could define us at the beginning is a phi node
- if (MemoryPhi *Phi = getMemoryAccess(UseBlock))
- return Phi;
-
- DomTreeNode *CurrNode = DT->getNode(UseBlock);
- // Need to be defined by our dominator
- if (Where == Beginning)
- CurrNode = CurrNode->getIDom();
- Where = End;
- while (CurrNode) {
- auto It = PerBlockAccesses.find(CurrNode->getBlock());
- if (It != PerBlockAccesses.end()) {
- auto &Accesses = It->second;
- for (MemoryAccess &RA : reverse(*Accesses)) {
- if (isa<MemoryDef>(RA) || isa<MemoryPhi>(RA))
- return &RA;
- }
- }
- CurrNode = CurrNode->getIDom();
- }
- return LiveOnEntryDef.get();
-}
-
-/// \brief Returns true if \p Replacer dominates \p Replacee .
-bool MemorySSA::dominatesUse(const MemoryAccess *Replacer,
- const MemoryAccess *Replacee) const {
- if (isa<MemoryUseOrDef>(Replacee))
- return DT->dominates(Replacer->getBlock(), Replacee->getBlock());
- const auto *MP = cast<MemoryPhi>(Replacee);
- // For a phi node, the use occurs in the predecessor block of the phi node.
- // Since we may occur multiple times in the phi node, we have to check each
- // operand to ensure Replacer dominates each operand where Replacee occurs.
- for (const Use &Arg : MP->operands()) {
- if (Arg.get() != Replacee &&
- !DT->dominates(Replacer->getBlock(), MP->getIncomingBlock(Arg)))
- return false;
- }
- return true;
-}
-
-/// \brief If all arguments of a MemoryPHI are defined by the same incoming
-/// argument, return that argument.
-static MemoryAccess *onlySingleValue(MemoryPhi *MP) {
- MemoryAccess *MA = nullptr;
-
- for (auto &Arg : MP->operands()) {
- if (!MA)
- MA = cast<MemoryAccess>(Arg);
- else if (MA != Arg)
- return nullptr;
- }
- return MA;
-}
-
-/// \brief Properly remove \p MA from all of MemorySSA's lookup tables.
-///
-/// Because of the way the intrusive list and use lists work, it is important to
-/// do removal in the right order.
-void MemorySSA::removeFromLookups(MemoryAccess *MA) {
- assert(MA->use_empty() &&
- "Trying to remove memory access that still has uses");
- BlockNumbering.erase(MA);
- if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA))
- MUD->setDefiningAccess(nullptr);
- // Invalidate our walker's cache if necessary
- if (!isa<MemoryUse>(MA))
- Walker->invalidateInfo(MA);
- // The call below to erase will destroy MA, so we can't change the order we
- // are doing things here
- Value *MemoryInst;
- if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) {
- MemoryInst = MUD->getMemoryInst();
- } else {
- MemoryInst = MA->getBlock();
- }
- auto VMA = ValueToMemoryAccess.find(MemoryInst);
- if (VMA->second == MA)
- ValueToMemoryAccess.erase(VMA);
-
- auto AccessIt = PerBlockAccesses.find(MA->getBlock());
- std::unique_ptr<AccessList> &Accesses = AccessIt->second;
- Accesses->erase(MA);
- if (Accesses->empty())
- PerBlockAccesses.erase(AccessIt);
-}
-
-void MemorySSA::removeMemoryAccess(MemoryAccess *MA) {
- assert(!isLiveOnEntryDef(MA) && "Trying to remove the live on entry def");
- // We can only delete phi nodes if they have no uses, or we can replace all
- // uses with a single definition.
- MemoryAccess *NewDefTarget = nullptr;
- if (MemoryPhi *MP = dyn_cast<MemoryPhi>(MA)) {
- // Note that it is sufficient to know that all edges of the phi node have
- // the same argument. If they do, by the definition of dominance frontiers
- // (which we used to place this phi), that argument must dominate this phi,
- // and thus, must dominate the phi's uses, and so we will not hit the assert
- // below.
- NewDefTarget = onlySingleValue(MP);
- assert((NewDefTarget || MP->use_empty()) &&
- "We can't delete this memory phi");
- } else {
- NewDefTarget = cast<MemoryUseOrDef>(MA)->getDefiningAccess();
- }
-
- // Re-point the uses at our defining access
- if (!MA->use_empty()) {
- // Reset optimized on users of this store, and reset the uses.
- // A few notes:
- // 1. This is a slightly modified version of RAUW to avoid walking the
- // uses twice here.
- // 2. If we wanted to be complete, we would have to reset the optimized
- // flags on users of phi nodes if doing the below makes a phi node have all
- // the same arguments. Instead, we prefer users to removeMemoryAccess those
- // phi nodes, because doing it here would be N^3.
- if (MA->hasValueHandle())
- ValueHandleBase::ValueIsRAUWd(MA, NewDefTarget);
- // Note: We assume MemorySSA is not used in metadata since it's not really
- // part of the IR.
-
- while (!MA->use_empty()) {
- Use &U = *MA->use_begin();
- if (MemoryUse *MU = dyn_cast<MemoryUse>(U.getUser()))
- MU->resetOptimized();
- U.set(NewDefTarget);
- }
- }
-
- // The call below to erase will destroy MA, so we can't change the order we
- // are doing things here
- removeFromLookups(MA);
-}
-
-void MemorySSA::print(raw_ostream &OS) const {
- MemorySSAAnnotatedWriter Writer(this);
- F.print(OS, &Writer);
-}
-
-void MemorySSA::dump() const {
- MemorySSAAnnotatedWriter Writer(this);
- F.print(dbgs(), &Writer);
-}
-
-void MemorySSA::verifyMemorySSA() const {
- verifyDefUses(F);
- verifyDomination(F);
- verifyOrdering(F);
- Walker->verify(this);
-}
-
-/// \brief Verify that the order and existence of MemoryAccesses matches the
-/// order and existence of memory affecting instructions.
-void MemorySSA::verifyOrdering(Function &F) const {
- // Walk all the blocks, comparing what the lookups think and what the access
- // lists think, as well as the order in the blocks vs the order in the access
- // lists.
- SmallVector<MemoryAccess *, 32> ActualAccesses;
- for (BasicBlock &B : F) {
- const AccessList *AL = getBlockAccesses(&B);
- MemoryAccess *Phi = getMemoryAccess(&B);
- if (Phi)
- ActualAccesses.push_back(Phi);
- for (Instruction &I : B) {
- MemoryAccess *MA = getMemoryAccess(&I);
- assert((!MA || AL) && "We have memory affecting instructions "
- "in this block but they are not in the "
- "access list");
- if (MA)
- ActualAccesses.push_back(MA);
- }
- // Either we hit the assert, really have no accesses, or we have both
- // accesses and an access list
- if (!AL)
- continue;
- assert(AL->size() == ActualAccesses.size() &&
- "We don't have the same number of accesses in the block as on the "
- "access list");
- auto ALI = AL->begin();
- auto AAI = ActualAccesses.begin();
- while (ALI != AL->end() && AAI != ActualAccesses.end()) {
- assert(&*ALI == *AAI && "Not the same accesses in the same order");
- ++ALI;
- ++AAI;
- }
- ActualAccesses.clear();
- }
-}
-
-/// \brief Verify the domination properties of MemorySSA by checking that each
-/// definition dominates all of its uses.
-void MemorySSA::verifyDomination(Function &F) const {
-#ifndef NDEBUG
- for (BasicBlock &B : F) {
- // Phi nodes are attached to basic blocks
- if (MemoryPhi *MP = getMemoryAccess(&B))
- for (const Use &U : MP->uses())
- assert(dominates(MP, U) && "Memory PHI does not dominate it's uses");
-
- for (Instruction &I : B) {
- MemoryAccess *MD = dyn_cast_or_null<MemoryDef>(getMemoryAccess(&I));
- if (!MD)
- continue;
-
- for (const Use &U : MD->uses())
- assert(dominates(MD, U) && "Memory Def does not dominate it's uses");
- }
- }
-#endif
-}
-
-/// \brief Verify the def-use lists in MemorySSA, by verifying that \p Use
-/// appears in the use list of \p Def.
-
-void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
-#ifndef NDEBUG
- // The live on entry use may cause us to get a NULL def here
- if (!Def)
- assert(isLiveOnEntryDef(Use) &&
- "Null def but use not point to live on entry def");
- else
- assert(is_contained(Def->users(), Use) &&
- "Did not find use in def's use list");
-#endif
-}
-
-/// \brief Verify the immediate use information, by walking all the memory
-/// accesses and verifying that, for each use, it appears in the
-/// appropriate def's use list
-void MemorySSA::verifyDefUses(Function &F) const {
- for (BasicBlock &B : F) {
- // Phi nodes are attached to basic blocks
- if (MemoryPhi *Phi = getMemoryAccess(&B)) {
- assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance(
- pred_begin(&B), pred_end(&B))) &&
- "Incomplete MemoryPhi Node");
- for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I)
- verifyUseInDefs(Phi->getIncomingValue(I), Phi);
- }
-
- for (Instruction &I : B) {
- if (MemoryUseOrDef *MA = getMemoryAccess(&I)) {
- verifyUseInDefs(MA->getDefiningAccess(), MA);
- }
- }
- }
-}
-
-MemoryUseOrDef *MemorySSA::getMemoryAccess(const Instruction *I) const {
- return cast_or_null<MemoryUseOrDef>(ValueToMemoryAccess.lookup(I));
-}
-
-MemoryPhi *MemorySSA::getMemoryAccess(const BasicBlock *BB) const {
- return cast_or_null<MemoryPhi>(ValueToMemoryAccess.lookup(cast<Value>(BB)));
-}
-
-/// Perform a local numbering on blocks so that instruction ordering can be
-/// determined in constant time.
-/// TODO: We currently just number in order. If we numbered by N, we could
-/// allow at least N-1 sequences of insertBefore or insertAfter (and at least
-/// log2(N) sequences of mixed before and after) without needing to invalidate
-/// the numbering.
-void MemorySSA::renumberBlock(const BasicBlock *B) const {
- // The pre-increment ensures the numbers really start at 1.
- unsigned long CurrentNumber = 0;
- const AccessList *AL = getBlockAccesses(B);
- assert(AL != nullptr && "Asking to renumber an empty block");
- for (const auto &I : *AL)
- BlockNumbering[&I] = ++CurrentNumber;
- BlockNumberingValid.insert(B);
-}
-
-/// \brief Determine, for two memory accesses in the same block,
-/// whether \p Dominator dominates \p Dominatee.
-/// \returns True if \p Dominator dominates \p Dominatee.
-bool MemorySSA::locallyDominates(const MemoryAccess *Dominator,
- const MemoryAccess *Dominatee) const {
-
- const BasicBlock *DominatorBlock = Dominator->getBlock();
-
- assert((DominatorBlock == Dominatee->getBlock()) &&
- "Asking for local domination when accesses are in different blocks!");
- // A node dominates itself.
- if (Dominatee == Dominator)
- return true;
-
- // When Dominatee is defined on function entry, it is not dominated by another
- // memory access.
- if (isLiveOnEntryDef(Dominatee))
- return false;
-
- // When Dominator is defined on function entry, it dominates the other memory
- // access.
- if (isLiveOnEntryDef(Dominator))
- return true;
-
- if (!BlockNumberingValid.count(DominatorBlock))
- renumberBlock(DominatorBlock);
-
- unsigned long DominatorNum = BlockNumbering.lookup(Dominator);
- // All numbers start with 1
- assert(DominatorNum != 0 && "Block was not numbered properly");
- unsigned long DominateeNum = BlockNumbering.lookup(Dominatee);
- assert(DominateeNum != 0 && "Block was not numbered properly");
- return DominatorNum < DominateeNum;
-}
-
-bool MemorySSA::dominates(const MemoryAccess *Dominator,
- const MemoryAccess *Dominatee) const {
- if (Dominator == Dominatee)
- return true;
-
- if (isLiveOnEntryDef(Dominatee))
- return false;
-
- if (Dominator->getBlock() != Dominatee->getBlock())
- return DT->dominates(Dominator->getBlock(), Dominatee->getBlock());
- return locallyDominates(Dominator, Dominatee);
-}
-
-bool MemorySSA::dominates(const MemoryAccess *Dominator,
- const Use &Dominatee) const {
- if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Dominatee.getUser())) {
- BasicBlock *UseBB = MP->getIncomingBlock(Dominatee);
- // The def must dominate the incoming block of the phi.
- if (UseBB != Dominator->getBlock())
- return DT->dominates(Dominator->getBlock(), UseBB);
- // If the UseBB and the DefBB are the same, compare locally.
- return locallyDominates(Dominator, cast<MemoryAccess>(Dominatee));
- }
- // If it's not a PHI node use, the normal dominates can already handle it.
- return dominates(Dominator, cast<MemoryAccess>(Dominatee.getUser()));
-}
-
-const static char LiveOnEntryStr[] = "liveOnEntry";
-
-void MemoryDef::print(raw_ostream &OS) const {
- MemoryAccess *UO = getDefiningAccess();
-
- OS << getID() << " = MemoryDef(";
- if (UO && UO->getID())
- OS << UO->getID();
- else
- OS << LiveOnEntryStr;
- OS << ')';
-}
-
-void MemoryPhi::print(raw_ostream &OS) const {
- bool First = true;
- OS << getID() << " = MemoryPhi(";
- for (const auto &Op : operands()) {
- BasicBlock *BB = getIncomingBlock(Op);
- MemoryAccess *MA = cast<MemoryAccess>(Op);
- if (!First)
- OS << ',';
- else
- First = false;
-
- OS << '{';
- if (BB->hasName())
- OS << BB->getName();
- else
- BB->printAsOperand(OS, false);
- OS << ',';
- if (unsigned ID = MA->getID())
- OS << ID;
- else
- OS << LiveOnEntryStr;
- OS << '}';
- }
- OS << ')';
-}
-
-MemoryAccess::~MemoryAccess() {}
-
-void MemoryUse::print(raw_ostream &OS) const {
- MemoryAccess *UO = getDefiningAccess();
- OS << "MemoryUse(";
- if (UO && UO->getID())
- OS << UO->getID();
- else
- OS << LiveOnEntryStr;
- OS << ')';
-}
-
-void MemoryAccess::dump() const {
- print(dbgs());
- dbgs() << "\n";
-}
-
-char MemorySSAPrinterLegacyPass::ID = 0;
-
-MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) {
- initializeMemorySSAPrinterLegacyPassPass(*PassRegistry::getPassRegistry());
-}
-
-void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
-}
-
-bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) {
- auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
- MSSA.print(dbgs());
- if (VerifyMemorySSA)
- MSSA.verifyMemorySSA();
- return false;
-}
-
-AnalysisKey MemorySSAAnalysis::Key;
-
-MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F,
- FunctionAnalysisManager &AM) {
- auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- auto &AA = AM.getResult<AAManager>(F);
- return MemorySSAAnalysis::Result(make_unique<MemorySSA>(F, &AA, &DT));
-}
-
-PreservedAnalyses MemorySSAPrinterPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- OS << "MemorySSA for function: " << F.getName() << "\n";
- AM.getResult<MemorySSAAnalysis>(F).getMSSA().print(OS);
-
- return PreservedAnalyses::all();
-}
-
-PreservedAnalyses MemorySSAVerifierPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- AM.getResult<MemorySSAAnalysis>(F).getMSSA().verifyMemorySSA();
-
- return PreservedAnalyses::all();
-}
-
-char MemorySSAWrapperPass::ID = 0;
-
-MemorySSAWrapperPass::MemorySSAWrapperPass() : FunctionPass(ID) {
- initializeMemorySSAWrapperPassPass(*PassRegistry::getPassRegistry());
-}
-
-void MemorySSAWrapperPass::releaseMemory() { MSSA.reset(); }
-
-void MemorySSAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequiredTransitive<DominatorTreeWrapperPass>();
- AU.addRequiredTransitive<AAResultsWrapperPass>();
-}
-
-bool MemorySSAWrapperPass::runOnFunction(Function &F) {
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
- MSSA.reset(new MemorySSA(F, &AA, &DT));
- return false;
-}
-
-void MemorySSAWrapperPass::verifyAnalysis() const { MSSA->verifyMemorySSA(); }
-
-void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const {
- MSSA->print(OS);
-}
-
-MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {}
-
-MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A,
- DominatorTree *D)
- : MemorySSAWalker(M), Walker(*M, *A, *D, Cache), AutoResetWalker(true) {}
-
-MemorySSA::CachingWalker::~CachingWalker() {}
-
-void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) {
- // TODO: We can do much better cache invalidation with differently stored
- // caches. For now, for MemoryUses, we simply remove them
- // from the cache, and kill the entire call/non-call cache for everything
- // else. The problem is for phis or defs, currently we'd need to follow use
- // chains down and invalidate anything below us in the chain that currently
- // terminates at this access.
-
- // See if this is a MemoryUse, if so, just remove the cached info. MemoryUse
- // is by definition never a barrier, so nothing in the cache could point to
- // this use. In that case, we only need invalidate the info for the use
- // itself.
-
- if (MemoryUse *MU = dyn_cast<MemoryUse>(MA)) {
- UpwardsMemoryQuery Q(MU->getMemoryInst(), MU);
- Cache.remove(MU, Q.StartingLoc, Q.IsCall);
- MU->resetOptimized();
- } else {
- // If it is not a use, the best we can do right now is destroy the cache.
- Cache.clear();
- }
-
-#ifdef EXPENSIVE_CHECKS
- verifyRemoved(MA);
-#endif
-}
-
-/// \brief Walk the use-def chains starting at \p MA and find
-/// the MemoryAccess that actually clobbers Loc.
-///
-/// \returns our clobbering memory access
-MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
- MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) {
- MemoryAccess *New = Walker.findClobber(StartingAccess, Q);
-#ifdef EXPENSIVE_CHECKS
- MemoryAccess *NewNoCache =
- Walker.findClobber(StartingAccess, Q, /*UseWalkerCache=*/false);
- assert(NewNoCache == New && "Cache made us hand back a different result?");
-#endif
- if (AutoResetWalker)
- resetClobberWalker();
- return New;
-}
-
-MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
- MemoryAccess *StartingAccess, const MemoryLocation &Loc) {
- if (isa<MemoryPhi>(StartingAccess))
- return StartingAccess;
-
- auto *StartingUseOrDef = cast<MemoryUseOrDef>(StartingAccess);
- if (MSSA->isLiveOnEntryDef(StartingUseOrDef))
- return StartingUseOrDef;
-
- Instruction *I = StartingUseOrDef->getMemoryInst();
-
- // Conservatively, fences are always clobbers, so don't perform the walk if we
- // hit a fence.
- if (!ImmutableCallSite(I) && I->isFenceLike())
- return StartingUseOrDef;
-
- UpwardsMemoryQuery Q;
- Q.OriginalAccess = StartingUseOrDef;
- Q.StartingLoc = Loc;
- Q.Inst = I;
- Q.IsCall = false;
-
- if (auto *CacheResult = Cache.lookup(StartingUseOrDef, Loc, Q.IsCall))
- return CacheResult;
-
- // Unlike the other function, do not walk to the def of a def, because we are
- // handed something we already believe is the clobbering access.
- MemoryAccess *DefiningAccess = isa<MemoryUse>(StartingUseOrDef)
- ? StartingUseOrDef->getDefiningAccess()
- : StartingUseOrDef;
-
- MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q);
- DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
- DEBUG(dbgs() << *StartingUseOrDef << "\n");
- DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
- DEBUG(dbgs() << *Clobber << "\n");
- return Clobber;
-}
-
-MemoryAccess *
-MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
- auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA);
- // If this is a MemoryPhi, we can't do anything.
- if (!StartingAccess)
- return MA;
-
- // If this is an already optimized use or def, return the optimized result.
- // Note: Currently, we do not store the optimized def result because we'd need
- // a separate field, since we can't use it as the defining access.
- if (MemoryUse *MU = dyn_cast<MemoryUse>(StartingAccess))
- if (MU->isOptimized())
- return MU->getDefiningAccess();
-
- const Instruction *I = StartingAccess->getMemoryInst();
- UpwardsMemoryQuery Q(I, StartingAccess);
- // We can't sanely do anything with a fences, they conservatively
- // clobber all memory, and have no locations to get pointers from to
- // try to disambiguate.
- if (!Q.IsCall && I->isFenceLike())
- return StartingAccess;
-
- if (auto *CacheResult = Cache.lookup(StartingAccess, Q.StartingLoc, Q.IsCall))
- return CacheResult;
-
- if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) {
- MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef();
- Cache.insert(StartingAccess, LiveOnEntry, Q.StartingLoc, Q.IsCall);
- if (MemoryUse *MU = dyn_cast<MemoryUse>(StartingAccess))
- MU->setDefiningAccess(LiveOnEntry, true);
- return LiveOnEntry;
- }
-
- // Start with the thing we already think clobbers this location
- MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess();
-
- // At this point, DefiningAccess may be the live on entry def.
- // If it is, we will not get a better result.
- if (MSSA->isLiveOnEntryDef(DefiningAccess))
- return DefiningAccess;
-
- MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q);
- DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
- DEBUG(dbgs() << *DefiningAccess << "\n");
- DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
- DEBUG(dbgs() << *Result << "\n");
- if (MemoryUse *MU = dyn_cast<MemoryUse>(StartingAccess))
- MU->setDefiningAccess(Result, true);
-
- return Result;
-}
-
-// Verify that MA doesn't exist in any of the caches.
-void MemorySSA::CachingWalker::verifyRemoved(MemoryAccess *MA) {
- assert(!Cache.contains(MA) && "Found removed MemoryAccess in cache.");
-}
-
-MemoryAccess *
-DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
- if (auto *Use = dyn_cast<MemoryUseOrDef>(MA))
- return Use->getDefiningAccess();
- return MA;
-}
-
-MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess(
- MemoryAccess *StartingAccess, const MemoryLocation &) {
- if (auto *Use = dyn_cast<MemoryUseOrDef>(StartingAccess))
- return Use->getDefiningAccess();
- return StartingAccess;
-}
-} // namespace llvm
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index c999bd008fef..481c6aa29c3a 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -16,6 +16,7 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
@@ -67,6 +68,7 @@ namespace {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.setPreservesAll();
}
@@ -110,9 +112,15 @@ namespace {
}
// Rename all functions
+ const TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
for (auto &F : M) {
StringRef Name = F.getName();
- if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+ LibFunc Tmp;
+ // Leave library functions alone because their presence or absence could
+ // affect the behavior of other passes.
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
+ TLI.getLibFunc(F, Tmp))
continue;
F.setName(renamer.newName());
@@ -139,8 +147,11 @@ namespace {
}
char MetaRenamer::ID = 0;
-INITIALIZE_PASS(MetaRenamer, "metarenamer",
- "Assign new names to everything", false, false)
+INITIALIZE_PASS_BEGIN(MetaRenamer, "metarenamer",
+ "Assign new names to everything", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(MetaRenamer, "metarenamer",
+ "Assign new names to everything", false, false)
//===----------------------------------------------------------------------===//
//
// MetaRenamer - Rename everything with metasyntactic names.
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index 0d623df77a67..dbe42c201dd4 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -130,13 +130,25 @@ void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) {
if (isa<Function>(FuncOrBitcast))
return cast<Function>(FuncOrBitcast);
- FuncOrBitcast->dump();
+ FuncOrBitcast->print(errs());
+ errs() << '\n';
std::string Err;
raw_string_ostream Stream(Err);
Stream << "Sanitizer interface function redefined: " << *FuncOrBitcast;
report_fatal_error(Err);
}
+Function *llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
+ ArrayRef<Type *> InitArgTypes) {
+ assert(!InitName.empty() && "Expected init function name");
+ Function *F = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ InitName,
+ FunctionType::get(Type::getVoidTy(M.getContext()), InitArgTypes, false),
+ AttributeList()));
+ F->setLinkage(Function::ExternalLinkage);
+ return F;
+}
+
std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
Module &M, StringRef CtorName, StringRef InitName,
ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
@@ -144,22 +156,19 @@ std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
assert(!InitName.empty() && "Expected init function name");
assert(InitArgs.size() == InitArgTypes.size() &&
"Sanitizer's init function expects different number of arguments");
+ Function *InitFunction =
+ declareSanitizerInitFunction(M, InitName, InitArgTypes);
Function *Ctor = Function::Create(
FunctionType::get(Type::getVoidTy(M.getContext()), false),
GlobalValue::InternalLinkage, CtorName, &M);
BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
IRBuilder<> IRB(ReturnInst::Create(M.getContext(), CtorBB));
- Function *InitFunction =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- InitName, FunctionType::get(IRB.getVoidTy(), InitArgTypes, false),
- AttributeSet()));
- InitFunction->setLinkage(Function::ExternalLinkage);
IRB.CreateCall(InitFunction, InitArgs);
if (!VersionCheckName.empty()) {
Function *VersionCheckFunction =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
- AttributeSet()));
+ AttributeList()));
IRB.CreateCall(VersionCheckFunction, {});
}
return std::make_pair(Ctor, InitFunction);
diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp
new file mode 100644
index 000000000000..8877aeafecde
--- /dev/null
+++ b/lib/Transforms/Utils/PredicateInfo.cpp
@@ -0,0 +1,782 @@
+//===-- PredicateInfo.cpp - PredicateInfo Builder--------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------===//
+//
+// This file implements the PredicateInfo class.
+//
+//===----------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/PredicateInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/OrderedBasicBlock.h"
+#include "llvm/IR/AssemblyAnnotationWriter.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Transforms/Scalar.h"
+#include <algorithm>
+#define DEBUG_TYPE "predicateinfo"
+using namespace llvm;
+using namespace PatternMatch;
+using namespace llvm::PredicateInfoClasses;
+
+INITIALIZE_PASS_BEGIN(PredicateInfoPrinterLegacyPass, "print-predicateinfo",
+ "PredicateInfo Printer", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_END(PredicateInfoPrinterLegacyPass, "print-predicateinfo",
+ "PredicateInfo Printer", false, false)
+static cl::opt<bool> VerifyPredicateInfo(
+ "verify-predicateinfo", cl::init(false), cl::Hidden,
+ cl::desc("Verify PredicateInfo in legacy printer pass."));
+namespace {
+DEBUG_COUNTER(RenameCounter, "predicateinfo-rename",
+ "Controls which variables are renamed with predicateinfo")
+// Given a predicate info that is a type of branching terminator, get the
+// branching block.
+const BasicBlock *getBranchBlock(const PredicateBase *PB) {
+ assert(isa<PredicateWithEdge>(PB) &&
+ "Only branches and switches should have PHIOnly defs that "
+ "require branch blocks.");
+ return cast<PredicateWithEdge>(PB)->From;
+}
+
+// Given a predicate info that is a type of branching terminator, get the
+// branching terminator.
+static Instruction *getBranchTerminator(const PredicateBase *PB) {
+ assert(isa<PredicateWithEdge>(PB) &&
+ "Not a predicate info type we know how to get a terminator from.");
+ return cast<PredicateWithEdge>(PB)->From->getTerminator();
+}
+
+// Given a predicate info that is a type of branching terminator, get the
+// edge this predicate info represents
+const std::pair<BasicBlock *, BasicBlock *>
+getBlockEdge(const PredicateBase *PB) {
+ assert(isa<PredicateWithEdge>(PB) &&
+ "Not a predicate info type we know how to get an edge from.");
+ const auto *PEdge = cast<PredicateWithEdge>(PB);
+ return std::make_pair(PEdge->From, PEdge->To);
+}
+}
+
+namespace llvm {
+namespace PredicateInfoClasses {
+enum LocalNum {
+ // Operations that must appear first in the block.
+ LN_First,
+ // Operations that are somewhere in the middle of the block, and are sorted on
+ // demand.
+ LN_Middle,
+ // Operations that must appear last in a block, like successor phi node uses.
+ LN_Last
+};
+
+// Associate global and local DFS info with defs and uses, so we can sort them
+// into a global domination ordering.
+struct ValueDFS {
+ int DFSIn = 0;
+ int DFSOut = 0;
+ unsigned int LocalNum = LN_Middle;
+ // Only one of Def or Use will be set.
+ Value *Def = nullptr;
+ Use *U = nullptr;
+ // Neither PInfo nor EdgeOnly participate in the ordering
+ PredicateBase *PInfo = nullptr;
+ bool EdgeOnly = false;
+};
+
+// This compares ValueDFS structures, creating OrderedBasicBlocks where
+// necessary to compare uses/defs in the same block. Doing so allows us to walk
+// the minimum number of instructions necessary to compute our def/use ordering.
+struct ValueDFS_Compare {
+ DenseMap<const BasicBlock *, std::unique_ptr<OrderedBasicBlock>> &OBBMap;
+ ValueDFS_Compare(
+ DenseMap<const BasicBlock *, std::unique_ptr<OrderedBasicBlock>> &OBBMap)
+ : OBBMap(OBBMap) {}
+ bool operator()(const ValueDFS &A, const ValueDFS &B) const {
+ if (&A == &B)
+ return false;
+ // The only case we can't directly compare them is when they in the same
+ // block, and both have localnum == middle. In that case, we have to use
+ // comesbefore to see what the real ordering is, because they are in the
+ // same basic block.
+
+ bool SameBlock = std::tie(A.DFSIn, A.DFSOut) == std::tie(B.DFSIn, B.DFSOut);
+
+ // We want to put the def that will get used for a given set of phi uses,
+ // before those phi uses.
+ // So we sort by edge, then by def.
+ // Note that only phi nodes uses and defs can come last.
+ if (SameBlock && A.LocalNum == LN_Last && B.LocalNum == LN_Last)
+ return comparePHIRelated(A, B);
+
+ if (!SameBlock || A.LocalNum != LN_Middle || B.LocalNum != LN_Middle)
+ return std::tie(A.DFSIn, A.DFSOut, A.LocalNum, A.Def, A.U) <
+ std::tie(B.DFSIn, B.DFSOut, B.LocalNum, B.Def, B.U);
+ return localComesBefore(A, B);
+ }
+
+ // For a phi use, or a non-materialized def, return the edge it represents.
+ const std::pair<BasicBlock *, BasicBlock *>
+ getBlockEdge(const ValueDFS &VD) const {
+ if (!VD.Def && VD.U) {
+ auto *PHI = cast<PHINode>(VD.U->getUser());
+ return std::make_pair(PHI->getIncomingBlock(*VD.U), PHI->getParent());
+ }
+ // This is really a non-materialized def.
+ return ::getBlockEdge(VD.PInfo);
+ }
+
+ // For two phi related values, return the ordering.
+ bool comparePHIRelated(const ValueDFS &A, const ValueDFS &B) const {
+ auto &ABlockEdge = getBlockEdge(A);
+ auto &BBlockEdge = getBlockEdge(B);
+ // Now sort by block edge and then defs before uses.
+ return std::tie(ABlockEdge, A.Def, A.U) < std::tie(BBlockEdge, B.Def, B.U);
+ }
+
+ // Get the definition of an instruction that occurs in the middle of a block.
+ Value *getMiddleDef(const ValueDFS &VD) const {
+ if (VD.Def)
+ return VD.Def;
+ // It's possible for the defs and uses to be null. For branches, the local
+ // numbering will say the placed predicaeinfos should go first (IE
+ // LN_beginning), so we won't be in this function. For assumes, we will end
+ // up here, beause we need to order the def we will place relative to the
+ // assume. So for the purpose of ordering, we pretend the def is the assume
+ // because that is where we will insert the info.
+ if (!VD.U) {
+ assert(VD.PInfo &&
+ "No def, no use, and no predicateinfo should not occur");
+ assert(isa<PredicateAssume>(VD.PInfo) &&
+ "Middle of block should only occur for assumes");
+ return cast<PredicateAssume>(VD.PInfo)->AssumeInst;
+ }
+ return nullptr;
+ }
+
+ // Return either the Def, if it's not null, or the user of the Use, if the def
+ // is null.
+ const Instruction *getDefOrUser(const Value *Def, const Use *U) const {
+ if (Def)
+ return cast<Instruction>(Def);
+ return cast<Instruction>(U->getUser());
+ }
+
+ // This performs the necessary local basic block ordering checks to tell
+ // whether A comes before B, where both are in the same basic block.
+ bool localComesBefore(const ValueDFS &A, const ValueDFS &B) const {
+ auto *ADef = getMiddleDef(A);
+ auto *BDef = getMiddleDef(B);
+
+ // See if we have real values or uses. If we have real values, we are
+ // guaranteed they are instructions or arguments. No matter what, we are
+ // guaranteed they are in the same block if they are instructions.
+ auto *ArgA = dyn_cast_or_null<Argument>(ADef);
+ auto *ArgB = dyn_cast_or_null<Argument>(BDef);
+
+ if (ArgA && !ArgB)
+ return true;
+ if (ArgB && !ArgA)
+ return false;
+ if (ArgA && ArgB)
+ return ArgA->getArgNo() < ArgB->getArgNo();
+
+ auto *AInst = getDefOrUser(ADef, A.U);
+ auto *BInst = getDefOrUser(BDef, B.U);
+
+ auto *BB = AInst->getParent();
+ auto LookupResult = OBBMap.find(BB);
+ if (LookupResult != OBBMap.end())
+ return LookupResult->second->dominates(AInst, BInst);
+
+ auto Result = OBBMap.insert({BB, make_unique<OrderedBasicBlock>(BB)});
+ return Result.first->second->dominates(AInst, BInst);
+ }
+};
+
+} // namespace PredicateInfoClasses
+
+bool PredicateInfo::stackIsInScope(const ValueDFSStack &Stack,
+ const ValueDFS &VDUse) const {
+ if (Stack.empty())
+ return false;
+ // If it's a phi only use, make sure it's for this phi node edge, and that the
+ // use is in a phi node. If it's anything else, and the top of the stack is
+ // EdgeOnly, we need to pop the stack. We deliberately sort phi uses next to
+ // the defs they must go with so that we can know it's time to pop the stack
+ // when we hit the end of the phi uses for a given def.
+ if (Stack.back().EdgeOnly) {
+ if (!VDUse.U)
+ return false;
+ auto *PHI = dyn_cast<PHINode>(VDUse.U->getUser());
+ if (!PHI)
+ return false;
+ // Check edge
+ BasicBlock *EdgePred = PHI->getIncomingBlock(*VDUse.U);
+ if (EdgePred != getBranchBlock(Stack.back().PInfo))
+ return false;
+
+ // Use dominates, which knows how to handle edge dominance.
+ return DT.dominates(getBlockEdge(Stack.back().PInfo), *VDUse.U);
+ }
+
+ return (VDUse.DFSIn >= Stack.back().DFSIn &&
+ VDUse.DFSOut <= Stack.back().DFSOut);
+}
+
+void PredicateInfo::popStackUntilDFSScope(ValueDFSStack &Stack,
+ const ValueDFS &VD) {
+ while (!Stack.empty() && !stackIsInScope(Stack, VD))
+ Stack.pop_back();
+}
+
+// Convert the uses of Op into a vector of uses, associating global and local
+// DFS info with each one.
+void PredicateInfo::convertUsesToDFSOrdered(
+ Value *Op, SmallVectorImpl<ValueDFS> &DFSOrderedSet) {
+ for (auto &U : Op->uses()) {
+ if (auto *I = dyn_cast<Instruction>(U.getUser())) {
+ ValueDFS VD;
+ // Put the phi node uses in the incoming block.
+ BasicBlock *IBlock;
+ if (auto *PN = dyn_cast<PHINode>(I)) {
+ IBlock = PN->getIncomingBlock(U);
+ // Make phi node users appear last in the incoming block
+ // they are from.
+ VD.LocalNum = LN_Last;
+ } else {
+ // If it's not a phi node use, it is somewhere in the middle of the
+ // block.
+ IBlock = I->getParent();
+ VD.LocalNum = LN_Middle;
+ }
+ DomTreeNode *DomNode = DT.getNode(IBlock);
+ // It's possible our use is in an unreachable block. Skip it if so.
+ if (!DomNode)
+ continue;
+ VD.DFSIn = DomNode->getDFSNumIn();
+ VD.DFSOut = DomNode->getDFSNumOut();
+ VD.U = &U;
+ DFSOrderedSet.push_back(VD);
+ }
+ }
+}
+
+// Collect relevant operations from Comparison that we may want to insert copies
+// for.
+void collectCmpOps(CmpInst *Comparison, SmallVectorImpl<Value *> &CmpOperands) {
+ auto *Op0 = Comparison->getOperand(0);
+ auto *Op1 = Comparison->getOperand(1);
+ if (Op0 == Op1)
+ return;
+ CmpOperands.push_back(Comparison);
+ // Only want real values, not constants. Additionally, operands with one use
+ // are only being used in the comparison, which means they will not be useful
+ // for us to consider for predicateinfo.
+ //
+ if ((isa<Instruction>(Op0) || isa<Argument>(Op0)) && !Op0->hasOneUse())
+ CmpOperands.push_back(Op0);
+ if ((isa<Instruction>(Op1) || isa<Argument>(Op1)) && !Op1->hasOneUse())
+ CmpOperands.push_back(Op1);
+}
+
+// Add Op, PB to the list of value infos for Op, and mark Op to be renamed.
+void PredicateInfo::addInfoFor(SmallPtrSetImpl<Value *> &OpsToRename, Value *Op,
+ PredicateBase *PB) {
+ OpsToRename.insert(Op);
+ auto &OperandInfo = getOrCreateValueInfo(Op);
+ AllInfos.push_back(PB);
+ OperandInfo.Infos.push_back(PB);
+}
+
+// Process an assume instruction and place relevant operations we want to rename
+// into OpsToRename.
+void PredicateInfo::processAssume(IntrinsicInst *II, BasicBlock *AssumeBB,
+ SmallPtrSetImpl<Value *> &OpsToRename) {
+ // See if we have a comparison we support
+ SmallVector<Value *, 8> CmpOperands;
+ SmallVector<Value *, 2> ConditionsToProcess;
+ CmpInst::Predicate Pred;
+ Value *Operand = II->getOperand(0);
+ if (m_c_And(m_Cmp(Pred, m_Value(), m_Value()),
+ m_Cmp(Pred, m_Value(), m_Value()))
+ .match(II->getOperand(0))) {
+ ConditionsToProcess.push_back(cast<BinaryOperator>(Operand)->getOperand(0));
+ ConditionsToProcess.push_back(cast<BinaryOperator>(Operand)->getOperand(1));
+ ConditionsToProcess.push_back(Operand);
+ } else if (isa<CmpInst>(Operand)) {
+
+ ConditionsToProcess.push_back(Operand);
+ }
+ for (auto Cond : ConditionsToProcess) {
+ if (auto *Cmp = dyn_cast<CmpInst>(Cond)) {
+ collectCmpOps(Cmp, CmpOperands);
+ // Now add our copy infos for our operands
+ for (auto *Op : CmpOperands) {
+ auto *PA = new PredicateAssume(Op, II, Cmp);
+ addInfoFor(OpsToRename, Op, PA);
+ }
+ CmpOperands.clear();
+ } else if (auto *BinOp = dyn_cast<BinaryOperator>(Cond)) {
+ // Otherwise, it should be an AND.
+ assert(BinOp->getOpcode() == Instruction::And &&
+ "Should have been an AND");
+ auto *PA = new PredicateAssume(BinOp, II, BinOp);
+ addInfoFor(OpsToRename, BinOp, PA);
+ } else {
+ llvm_unreachable("Unknown type of condition");
+ }
+ }
+}
+
+// Process a block terminating branch, and place relevant operations to be
+// renamed into OpsToRename.
+void PredicateInfo::processBranch(BranchInst *BI, BasicBlock *BranchBB,
+ SmallPtrSetImpl<Value *> &OpsToRename) {
+ BasicBlock *FirstBB = BI->getSuccessor(0);
+ BasicBlock *SecondBB = BI->getSuccessor(1);
+ SmallVector<BasicBlock *, 2> SuccsToProcess;
+ SuccsToProcess.push_back(FirstBB);
+ SuccsToProcess.push_back(SecondBB);
+ SmallVector<Value *, 2> ConditionsToProcess;
+
+ auto InsertHelper = [&](Value *Op, bool isAnd, bool isOr, Value *Cond) {
+ for (auto *Succ : SuccsToProcess) {
+ // Don't try to insert on a self-edge. This is mainly because we will
+ // eliminate during renaming anyway.
+ if (Succ == BranchBB)
+ continue;
+ bool TakenEdge = (Succ == FirstBB);
+ // For and, only insert on the true edge
+ // For or, only insert on the false edge
+ if ((isAnd && !TakenEdge) || (isOr && TakenEdge))
+ continue;
+ PredicateBase *PB =
+ new PredicateBranch(Op, BranchBB, Succ, Cond, TakenEdge);
+ addInfoFor(OpsToRename, Op, PB);
+ if (!Succ->getSinglePredecessor())
+ EdgeUsesOnly.insert({BranchBB, Succ});
+ }
+ };
+
+ // Match combinations of conditions.
+ CmpInst::Predicate Pred;
+ bool isAnd = false;
+ bool isOr = false;
+ SmallVector<Value *, 8> CmpOperands;
+ if (match(BI->getCondition(), m_And(m_Cmp(Pred, m_Value(), m_Value()),
+ m_Cmp(Pred, m_Value(), m_Value()))) ||
+ match(BI->getCondition(), m_Or(m_Cmp(Pred, m_Value(), m_Value()),
+ m_Cmp(Pred, m_Value(), m_Value())))) {
+ auto *BinOp = cast<BinaryOperator>(BI->getCondition());
+ if (BinOp->getOpcode() == Instruction::And)
+ isAnd = true;
+ else if (BinOp->getOpcode() == Instruction::Or)
+ isOr = true;
+ ConditionsToProcess.push_back(BinOp->getOperand(0));
+ ConditionsToProcess.push_back(BinOp->getOperand(1));
+ ConditionsToProcess.push_back(BI->getCondition());
+ } else if (isa<CmpInst>(BI->getCondition())) {
+ ConditionsToProcess.push_back(BI->getCondition());
+ }
+ for (auto Cond : ConditionsToProcess) {
+ if (auto *Cmp = dyn_cast<CmpInst>(Cond)) {
+ collectCmpOps(Cmp, CmpOperands);
+ // Now add our copy infos for our operands
+ for (auto *Op : CmpOperands)
+ InsertHelper(Op, isAnd, isOr, Cmp);
+ } else if (auto *BinOp = dyn_cast<BinaryOperator>(Cond)) {
+ // This must be an AND or an OR.
+ assert((BinOp->getOpcode() == Instruction::And ||
+ BinOp->getOpcode() == Instruction::Or) &&
+ "Should have been an AND or an OR");
+ // The actual value of the binop is not subject to the same restrictions
+ // as the comparison. It's either true or false on the true/false branch.
+ InsertHelper(BinOp, false, false, BinOp);
+ } else {
+ llvm_unreachable("Unknown type of condition");
+ }
+ CmpOperands.clear();
+ }
+}
+// Process a block terminating switch, and place relevant operations to be
+// renamed into OpsToRename.
+void PredicateInfo::processSwitch(SwitchInst *SI, BasicBlock *BranchBB,
+ SmallPtrSetImpl<Value *> &OpsToRename) {
+ Value *Op = SI->getCondition();
+ if ((!isa<Instruction>(Op) && !isa<Argument>(Op)) || Op->hasOneUse())
+ return;
+
+ // Remember how many outgoing edges there are to every successor.
+ SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
+ for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *TargetBlock = SI->getSuccessor(i);
+ ++SwitchEdges[TargetBlock];
+ }
+
+ // Now propagate info for each case value
+ for (auto C : SI->cases()) {
+ BasicBlock *TargetBlock = C.getCaseSuccessor();
+ if (SwitchEdges.lookup(TargetBlock) == 1) {
+ PredicateSwitch *PS = new PredicateSwitch(
+ Op, SI->getParent(), TargetBlock, C.getCaseValue(), SI);
+ addInfoFor(OpsToRename, Op, PS);
+ if (!TargetBlock->getSinglePredecessor())
+ EdgeUsesOnly.insert({BranchBB, TargetBlock});
+ }
+ }
+}
+
+// Build predicate info for our function
+void PredicateInfo::buildPredicateInfo() {
+ DT.updateDFSNumbers();
+ // Collect operands to rename from all conditional branch terminators, as well
+ // as assume statements.
+ SmallPtrSet<Value *, 8> OpsToRename;
+ for (auto DTN : depth_first(DT.getRootNode())) {
+ BasicBlock *BranchBB = DTN->getBlock();
+ if (auto *BI = dyn_cast<BranchInst>(BranchBB->getTerminator())) {
+ if (!BI->isConditional())
+ continue;
+ processBranch(BI, BranchBB, OpsToRename);
+ } else if (auto *SI = dyn_cast<SwitchInst>(BranchBB->getTerminator())) {
+ processSwitch(SI, BranchBB, OpsToRename);
+ }
+ }
+ for (auto &Assume : AC.assumptions()) {
+ if (auto *II = dyn_cast_or_null<IntrinsicInst>(Assume))
+ processAssume(II, II->getParent(), OpsToRename);
+ }
+ // Now rename all our operations.
+ renameUses(OpsToRename);
+}
+
+// Given the renaming stack, make all the operands currently on the stack real
+// by inserting them into the IR. Return the last operation's value.
+Value *PredicateInfo::materializeStack(unsigned int &Counter,
+ ValueDFSStack &RenameStack,
+ Value *OrigOp) {
+ // Find the first thing we have to materialize
+ auto RevIter = RenameStack.rbegin();
+ for (; RevIter != RenameStack.rend(); ++RevIter)
+ if (RevIter->Def)
+ break;
+
+ size_t Start = RevIter - RenameStack.rbegin();
+ // The maximum number of things we should be trying to materialize at once
+ // right now is 4, depending on if we had an assume, a branch, and both used
+ // and of conditions.
+ for (auto RenameIter = RenameStack.end() - Start;
+ RenameIter != RenameStack.end(); ++RenameIter) {
+ auto *Op =
+ RenameIter == RenameStack.begin() ? OrigOp : (RenameIter - 1)->Def;
+ ValueDFS &Result = *RenameIter;
+ auto *ValInfo = Result.PInfo;
+ // For edge predicates, we can just place the operand in the block before
+ // the terminator. For assume, we have to place it right before the assume
+ // to ensure we dominate all of our uses. Always insert right before the
+ // relevant instruction (terminator, assume), so that we insert in proper
+ // order in the case of multiple predicateinfo in the same block.
+ if (isa<PredicateWithEdge>(ValInfo)) {
+ IRBuilder<> B(getBranchTerminator(ValInfo));
+ Function *IF = Intrinsic::getDeclaration(
+ F.getParent(), Intrinsic::ssa_copy, Op->getType());
+ CallInst *PIC =
+ B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++));
+ PredicateMap.insert({PIC, ValInfo});
+ Result.Def = PIC;
+ } else {
+ auto *PAssume = dyn_cast<PredicateAssume>(ValInfo);
+ assert(PAssume &&
+ "Should not have gotten here without it being an assume");
+ IRBuilder<> B(PAssume->AssumeInst);
+ Function *IF = Intrinsic::getDeclaration(
+ F.getParent(), Intrinsic::ssa_copy, Op->getType());
+ CallInst *PIC = B.CreateCall(IF, Op);
+ PredicateMap.insert({PIC, ValInfo});
+ Result.Def = PIC;
+ }
+ }
+ return RenameStack.back().Def;
+}
+
+// Instead of the standard SSA renaming algorithm, which is O(Number of
+// instructions), and walks the entire dominator tree, we walk only the defs +
+// uses. The standard SSA renaming algorithm does not really rely on the
+// dominator tree except to order the stack push/pops of the renaming stacks, so
+// that defs end up getting pushed before hitting the correct uses. This does
+// not require the dominator tree, only the *order* of the dominator tree. The
+// complete and correct ordering of the defs and uses, in dominator tree is
+// contained in the DFS numbering of the dominator tree. So we sort the defs and
+// uses into the DFS ordering, and then just use the renaming stack as per
+// normal, pushing when we hit a def (which is a predicateinfo instruction),
+// popping when we are out of the dfs scope for that def, and replacing any uses
+// with top of stack if it exists. In order to handle liveness without
+// propagating liveness info, we don't actually insert the predicateinfo
+// instruction def until we see a use that it would dominate. Once we see such
+// a use, we materialize the predicateinfo instruction in the right place and
+// use it.
+//
+// TODO: Use this algorithm to perform fast single-variable renaming in
+// promotememtoreg and memoryssa.
+void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpsToRename) {
+ ValueDFS_Compare Compare(OBBMap);
+ // Compute liveness, and rename in O(uses) per Op.
+ for (auto *Op : OpsToRename) {
+ unsigned Counter = 0;
+ SmallVector<ValueDFS, 16> OrderedUses;
+ const auto &ValueInfo = getValueInfo(Op);
+ // Insert the possible copies into the def/use list.
+ // They will become real copies if we find a real use for them, and never
+ // created otherwise.
+ for (auto &PossibleCopy : ValueInfo.Infos) {
+ ValueDFS VD;
+ // Determine where we are going to place the copy by the copy type.
+ // The predicate info for branches always come first, they will get
+ // materialized in the split block at the top of the block.
+ // The predicate info for assumes will be somewhere in the middle,
+ // it will get materialized in front of the assume.
+ if (const auto *PAssume = dyn_cast<PredicateAssume>(PossibleCopy)) {
+ VD.LocalNum = LN_Middle;
+ DomTreeNode *DomNode = DT.getNode(PAssume->AssumeInst->getParent());
+ if (!DomNode)
+ continue;
+ VD.DFSIn = DomNode->getDFSNumIn();
+ VD.DFSOut = DomNode->getDFSNumOut();
+ VD.PInfo = PossibleCopy;
+ OrderedUses.push_back(VD);
+ } else if (isa<PredicateWithEdge>(PossibleCopy)) {
+ // If we can only do phi uses, we treat it like it's in the branch
+ // block, and handle it specially. We know that it goes last, and only
+ // dominate phi uses.
+ auto BlockEdge = getBlockEdge(PossibleCopy);
+ if (EdgeUsesOnly.count(BlockEdge)) {
+ VD.LocalNum = LN_Last;
+ auto *DomNode = DT.getNode(BlockEdge.first);
+ if (DomNode) {
+ VD.DFSIn = DomNode->getDFSNumIn();
+ VD.DFSOut = DomNode->getDFSNumOut();
+ VD.PInfo = PossibleCopy;
+ VD.EdgeOnly = true;
+ OrderedUses.push_back(VD);
+ }
+ } else {
+ // Otherwise, we are in the split block (even though we perform
+ // insertion in the branch block).
+ // Insert a possible copy at the split block and before the branch.
+ VD.LocalNum = LN_First;
+ auto *DomNode = DT.getNode(BlockEdge.second);
+ if (DomNode) {
+ VD.DFSIn = DomNode->getDFSNumIn();
+ VD.DFSOut = DomNode->getDFSNumOut();
+ VD.PInfo = PossibleCopy;
+ OrderedUses.push_back(VD);
+ }
+ }
+ }
+ }
+
+ convertUsesToDFSOrdered(Op, OrderedUses);
+ std::sort(OrderedUses.begin(), OrderedUses.end(), Compare);
+ SmallVector<ValueDFS, 8> RenameStack;
+ // For each use, sorted into dfs order, push values and replaces uses with
+ // top of stack, which will represent the reaching def.
+ for (auto &VD : OrderedUses) {
+ // We currently do not materialize copy over copy, but we should decide if
+ // we want to.
+ bool PossibleCopy = VD.PInfo != nullptr;
+ if (RenameStack.empty()) {
+ DEBUG(dbgs() << "Rename Stack is empty\n");
+ } else {
+ DEBUG(dbgs() << "Rename Stack Top DFS numbers are ("
+ << RenameStack.back().DFSIn << ","
+ << RenameStack.back().DFSOut << ")\n");
+ }
+
+ DEBUG(dbgs() << "Current DFS numbers are (" << VD.DFSIn << ","
+ << VD.DFSOut << ")\n");
+
+ bool ShouldPush = (VD.Def || PossibleCopy);
+ bool OutOfScope = !stackIsInScope(RenameStack, VD);
+ if (OutOfScope || ShouldPush) {
+ // Sync to our current scope.
+ popStackUntilDFSScope(RenameStack, VD);
+ if (ShouldPush) {
+ RenameStack.push_back(VD);
+ }
+ }
+ // If we get to this point, and the stack is empty we must have a use
+ // with no renaming needed, just skip it.
+ if (RenameStack.empty())
+ continue;
+ // Skip values, only want to rename the uses
+ if (VD.Def || PossibleCopy)
+ continue;
+ if (!DebugCounter::shouldExecute(RenameCounter)) {
+ DEBUG(dbgs() << "Skipping execution due to debug counter\n");
+ continue;
+ }
+ ValueDFS &Result = RenameStack.back();
+
+ // If the possible copy dominates something, materialize our stack up to
+ // this point. This ensures every comparison that affects our operation
+ // ends up with predicateinfo.
+ if (!Result.Def)
+ Result.Def = materializeStack(Counter, RenameStack, Op);
+
+ DEBUG(dbgs() << "Found replacement " << *Result.Def << " for "
+ << *VD.U->get() << " in " << *(VD.U->getUser()) << "\n");
+ assert(DT.dominates(cast<Instruction>(Result.Def), *VD.U) &&
+ "Predicateinfo def should have dominated this use");
+ VD.U->set(Result.Def);
+ }
+ }
+}
+
+PredicateInfo::ValueInfo &PredicateInfo::getOrCreateValueInfo(Value *Operand) {
+ auto OIN = ValueInfoNums.find(Operand);
+ if (OIN == ValueInfoNums.end()) {
+ // This will grow it
+ ValueInfos.resize(ValueInfos.size() + 1);
+ // This will use the new size and give us a 0 based number of the info
+ auto InsertResult = ValueInfoNums.insert({Operand, ValueInfos.size() - 1});
+ assert(InsertResult.second && "Value info number already existed?");
+ return ValueInfos[InsertResult.first->second];
+ }
+ return ValueInfos[OIN->second];
+}
+
+const PredicateInfo::ValueInfo &
+PredicateInfo::getValueInfo(Value *Operand) const {
+ auto OINI = ValueInfoNums.lookup(Operand);
+ assert(OINI != 0 && "Operand was not really in the Value Info Numbers");
+ assert(OINI < ValueInfos.size() &&
+ "Value Info Number greater than size of Value Info Table");
+ return ValueInfos[OINI];
+}
+
+PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT,
+ AssumptionCache &AC)
+ : F(F), DT(DT), AC(AC) {
+ // Push an empty operand info so that we can detect 0 as not finding one
+ ValueInfos.resize(1);
+ buildPredicateInfo();
+}
+
+PredicateInfo::~PredicateInfo() {}
+
+void PredicateInfo::verifyPredicateInfo() const {}
+
+char PredicateInfoPrinterLegacyPass::ID = 0;
+
+PredicateInfoPrinterLegacyPass::PredicateInfoPrinterLegacyPass()
+ : FunctionPass(ID) {
+ initializePredicateInfoPrinterLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<DominatorTreeWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+}
+
+bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ auto PredInfo = make_unique<PredicateInfo>(F, DT, AC);
+ PredInfo->print(dbgs());
+ if (VerifyPredicateInfo)
+ PredInfo->verifyPredicateInfo();
+ return false;
+}
+
+PreservedAnalyses PredicateInfoPrinterPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ OS << "PredicateInfo for function: " << F.getName() << "\n";
+ make_unique<PredicateInfo>(F, DT, AC)->print(OS);
+
+ return PreservedAnalyses::all();
+}
+
+/// \brief An assembly annotator class to print PredicateInfo information in
+/// comments.
+class PredicateInfoAnnotatedWriter : public AssemblyAnnotationWriter {
+ friend class PredicateInfo;
+ const PredicateInfo *PredInfo;
+
+public:
+ PredicateInfoAnnotatedWriter(const PredicateInfo *M) : PredInfo(M) {}
+
+ virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
+ formatted_raw_ostream &OS) {}
+
+ virtual void emitInstructionAnnot(const Instruction *I,
+ formatted_raw_ostream &OS) {
+ if (const auto *PI = PredInfo->getPredicateInfoFor(I)) {
+ OS << "; Has predicate info\n";
+ if (const auto *PB = dyn_cast<PredicateBranch>(PI)) {
+ OS << "; branch predicate info { TrueEdge: " << PB->TrueEdge
+ << " Comparison:" << *PB->Condition << " Edge: [";
+ PB->From->printAsOperand(OS);
+ OS << ",";
+ PB->To->printAsOperand(OS);
+ OS << "] }\n";
+ } else if (const auto *PS = dyn_cast<PredicateSwitch>(PI)) {
+ OS << "; switch predicate info { CaseValue: " << *PS->CaseValue
+ << " Switch:" << *PS->Switch << " Edge: [";
+ PS->From->printAsOperand(OS);
+ OS << ",";
+ PS->To->printAsOperand(OS);
+ OS << "] }\n";
+ } else if (const auto *PA = dyn_cast<PredicateAssume>(PI)) {
+ OS << "; assume predicate info {"
+ << " Comparison:" << *PA->Condition << " }\n";
+ }
+ }
+ }
+};
+
+void PredicateInfo::print(raw_ostream &OS) const {
+ PredicateInfoAnnotatedWriter Writer(this);
+ F.print(OS, &Writer);
+}
+
+void PredicateInfo::dump() const {
+ PredicateInfoAnnotatedWriter Writer(this);
+ F.print(dbgs(), &Writer);
+}
+
+PreservedAnalyses PredicateInfoVerifierPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ make_unique<PredicateInfo>(F, DT, AC)->verifyPredicateInfo();
+
+ return PreservedAnalyses::all();
+}
+}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 35faa6f65efd..a33b85c4ee69 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -15,7 +15,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
@@ -23,6 +22,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/IteratedDominanceFrontier.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -38,6 +38,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
using namespace llvm;
@@ -225,9 +226,6 @@ struct PromoteMem2Reg {
DominatorTree &DT;
DIBuilder DIB;
- /// An AliasSetTracker object to update. If null, don't update it.
- AliasSetTracker *AST;
-
/// A cache of @llvm.assume intrinsics used by SimplifyInstruction.
AssumptionCache *AC;
@@ -269,10 +267,10 @@ struct PromoteMem2Reg {
public:
PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
- AliasSetTracker *AST, AssumptionCache *AC)
+ AssumptionCache *AC)
: Allocas(Allocas.begin(), Allocas.end()), DT(DT),
DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false),
- AST(AST), AC(AC) {}
+ AC(AC) {}
void run();
@@ -301,6 +299,18 @@ private:
} // end of anonymous namespace
+/// Given a LoadInst LI this adds assume(LI != null) after it.
+static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
+ Function *AssumeIntrinsic =
+ Intrinsic::getDeclaration(LI->getModule(), Intrinsic::assume);
+ ICmpInst *LoadNotNull = new ICmpInst(ICmpInst::ICMP_NE, LI,
+ Constant::getNullValue(LI->getType()));
+ LoadNotNull->insertAfter(LI);
+ CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull});
+ CI->insertAfter(LoadNotNull);
+ AC->registerAssumption(CI);
+}
+
static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
// Knowing that this alloca is promotable, we know that it's safe to kill all
// instructions except for load and store.
@@ -334,9 +344,8 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
/// and thus must be phi-ed with undef. We fall back to the standard alloca
/// promotion algorithm in that case.
static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
- LargeBlockInfo &LBI,
- DominatorTree &DT,
- AliasSetTracker *AST) {
+ LargeBlockInfo &LBI, DominatorTree &DT,
+ AssumptionCache *AC) {
StoreInst *OnlyStore = Info.OnlyStore;
bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
BasicBlock *StoreBB = OnlyStore->getParent();
@@ -387,9 +396,15 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
// code.
if (ReplVal == LI)
ReplVal = UndefValue::get(LI->getType());
+
+ // If the load was marked as nonnull we don't want to lose
+ // that information when we erase this Load. So we preserve
+ // it with an assume.
+ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
+ !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
+ addAssumeNonNull(AC, LI);
+
LI->replaceAllUsesWith(ReplVal);
- if (AST && LI->getType()->isPointerTy())
- AST->deleteValue(LI);
LI->eraseFromParent();
LBI.deleteValue(LI);
}
@@ -410,8 +425,6 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
Info.OnlyStore->eraseFromParent();
LBI.deleteValue(Info.OnlyStore);
- if (AST)
- AST->deleteValue(AI);
AI->eraseFromParent();
LBI.deleteValue(AI);
return true;
@@ -435,7 +448,8 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
/// }
static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
LargeBlockInfo &LBI,
- AliasSetTracker *AST) {
+ DominatorTree &DT,
+ AssumptionCache *AC) {
// The trickiest case to handle is when we have large blocks. Because of this,
// this code is optimized assuming that large blocks happen. This does not
// significantly pessimize the small block case. This uses LargeBlockInfo to
@@ -476,13 +490,18 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
// There is no store before this load, bail out (load may be affected
// by the following stores - see main comment).
return false;
- }
- else
+ } else {
// Otherwise, there was a store before this load, the load takes its value.
- LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0));
+ // Note, if the load was marked as nonnull we don't want to lose that
+ // information when we erase it. So we preserve it with an assume.
+ Value *ReplVal = std::prev(I)->second->getOperand(0);
+ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
+ !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
+ addAssumeNonNull(AC, LI);
+
+ LI->replaceAllUsesWith(ReplVal);
+ }
- if (AST && LI->getType()->isPointerTy())
- AST->deleteValue(LI);
LI->eraseFromParent();
LBI.deleteValue(LI);
}
@@ -499,8 +518,6 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
LBI.deleteValue(SI);
}
- if (AST)
- AST->deleteValue(AI);
AI->eraseFromParent();
LBI.deleteValue(AI);
@@ -517,8 +534,6 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
void PromoteMem2Reg::run() {
Function &F = *DT.getRoot()->getParent();
- if (AST)
- PointerAllocaValues.resize(Allocas.size());
AllocaDbgDeclares.resize(Allocas.size());
AllocaInfo Info;
@@ -536,8 +551,6 @@ void PromoteMem2Reg::run() {
if (AI->use_empty()) {
// If there are no uses of the alloca, just delete it now.
- if (AST)
- AST->deleteValue(AI);
AI->eraseFromParent();
// Remove the alloca from the Allocas list, since it has been processed
@@ -553,7 +566,7 @@ void PromoteMem2Reg::run() {
// If there is only a single store to this value, replace any loads of
// it that are directly dominated by the definition with the value stored.
if (Info.DefiningBlocks.size() == 1) {
- if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) {
+ if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AC)) {
// The alloca has been processed, move on.
RemoveFromAllocasList(AllocaNum);
++NumSingleStore;
@@ -564,7 +577,7 @@ void PromoteMem2Reg::run() {
// If the alloca is only read and written in one basic block, just perform a
// linear sweep over the block to eliminate it.
if (Info.OnlyUsedInOneBlock &&
- promoteSingleBlockAlloca(AI, Info, LBI, AST)) {
+ promoteSingleBlockAlloca(AI, Info, LBI, DT, AC)) {
// The alloca has been processed, move on.
RemoveFromAllocasList(AllocaNum);
continue;
@@ -578,11 +591,6 @@ void PromoteMem2Reg::run() {
BBNumbers[&BB] = ID++;
}
- // If we have an AST to keep updated, remember some pointer value that is
- // stored into the alloca.
- if (AST)
- PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;
-
// Remember the dbg.declare intrinsic describing this alloca, if any.
if (Info.DbgDeclare)
AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
@@ -662,8 +670,6 @@ void PromoteMem2Reg::run() {
// tree. Just delete the users now.
if (!A->use_empty())
A->replaceAllUsesWith(UndefValue::get(A->getType()));
- if (AST)
- AST->deleteValue(A);
A->eraseFromParent();
}
@@ -694,8 +700,6 @@ void PromoteMem2Reg::run() {
// If this PHI node merges one value and/or undefs, get the value.
if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) {
- if (AST && PN->getType()->isPointerTy())
- AST->deleteValue(PN);
PN->replaceAllUsesWith(V);
PN->eraseFromParent();
NewPhiNodes.erase(I++);
@@ -863,10 +867,6 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
&BB->front());
++NumPHIInsert;
PhiToAllocaMap[PN] = AllocaNo;
-
- if (AST && PN->getType()->isPointerTy())
- AST->copyValue(PointerAllocaValues[AllocaNo], PN);
-
return true;
}
@@ -940,10 +940,15 @@ NextIteration:
Value *V = IncomingVals[AI->second];
+ // If the load was marked as nonnull we don't want to lose
+ // that information when we erase this Load. So we preserve
+ // it with an assume.
+ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
+ !llvm::isKnownNonNullAt(V, LI, &DT))
+ addAssumeNonNull(AC, LI);
+
// Anything using the load now uses the current value.
LI->replaceAllUsesWith(V);
- if (AST && LI->getType()->isPointerTy())
- AST->deleteValue(LI);
BB->getInstList().erase(LI);
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
// Delete this instruction and mark the name as the current holder of the
@@ -987,10 +992,10 @@ NextIteration:
}
void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
- AliasSetTracker *AST, AssumptionCache *AC) {
+ AssumptionCache *AC) {
// If there is nothing to do, bail out...
if (Allocas.empty())
return;
- PromoteMem2Reg(Allocas, DT, AST, AC).run();
+ PromoteMem2Reg(Allocas, DT, AC).run();
}
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 8e93ee757a15..8b6a2c3766d2 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -11,20 +11,29 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+#include <cassert>
+#include <utility>
using namespace llvm;
@@ -36,7 +45,7 @@ static AvailableValsTy &getAvailableVals(void *AV) {
}
SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
- : AV(nullptr), ProtoType(nullptr), ProtoName(), InsertedPHIs(NewPHI) {}
+ : InsertedPHIs(NewPHI) {}
SSAUpdater::~SSAUpdater() {
delete static_cast<AvailableValsTy*>(AV);
@@ -205,6 +214,7 @@ void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
}
namespace llvm {
+
template<>
class SSAUpdaterTraits<SSAUpdater> {
public:
@@ -230,6 +240,7 @@ public:
PHI_iterator &operator++() { ++idx; return *this; }
bool operator==(const PHI_iterator& x) const { return idx == x.idx; }
bool operator!=(const PHI_iterator& x) const { return !operator==(x); }
+
Value *getIncomingValue() { return PHI->getIncomingValue(idx); }
BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); }
};
@@ -303,7 +314,7 @@ public:
}
};
-} // End llvm namespace
+} // end namespace llvm
/// Check to see if AvailableVals has an entry for the specified BB and if so,
/// return it. If not, construct SSA form by first calculating the required
@@ -337,14 +348,12 @@ LoadAndStorePromoter(ArrayRef<const Instruction*> Insts,
SSA.Initialize(SomeVal->getType(), BaseName);
}
-
void LoadAndStorePromoter::
run(const SmallVectorImpl<Instruction*> &Insts) const {
-
// First step: bucket up uses of the alloca by the block they occur in.
// This is important because we have to handle multiple defs/uses in a block
// ourselves: SSAUpdater is purely for cross-block references.
- DenseMap<BasicBlock*, TinyPtrVector<Instruction*> > UsesByBlock;
+ DenseMap<BasicBlock*, TinyPtrVector<Instruction*>> UsesByBlock;
for (Instruction *User : Insts)
UsesByBlock[User->getParent()].push_back(User);
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 7b0bddbbb831..127a44df5344 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -169,6 +170,8 @@ class SimplifyCFGOpt {
unsigned BonusInstThreshold;
AssumptionCache *AC;
SmallPtrSetImpl<BasicBlock *> *LoopHeaders;
+ // See comments in SimplifyCFGOpt::SimplifySwitch.
+ bool LateSimplifyCFG;
Value *isValueEqualityComparison(TerminatorInst *TI);
BasicBlock *GetValueEqualityComparisonCases(
TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases);
@@ -192,9 +195,10 @@ class SimplifyCFGOpt {
public:
SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL,
unsigned BonusInstThreshold, AssumptionCache *AC,
- SmallPtrSetImpl<BasicBlock *> *LoopHeaders)
+ SmallPtrSetImpl<BasicBlock *> *LoopHeaders,
+ bool LateSimplifyCFG)
: TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC),
- LoopHeaders(LoopHeaders) {}
+ LoopHeaders(LoopHeaders), LateSimplifyCFG(LateSimplifyCFG) {}
bool run(BasicBlock *BB);
};
@@ -710,10 +714,9 @@ BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
Cases.reserve(SI->getNumCases());
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e;
- ++i)
- Cases.push_back(
- ValueEqualityComparisonCase(i.getCaseValue(), i.getCaseSuccessor()));
+ for (auto Case : SI->cases())
+ Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
+ Case.getCaseSuccessor()));
return SI->getDefaultDest();
}
@@ -846,12 +849,12 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
}
for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
--i;
- if (DeadCases.count(i.getCaseValue())) {
+ if (DeadCases.count(i->getCaseValue())) {
if (HasWeight) {
- std::swap(Weights[i.getCaseIndex() + 1], Weights.back());
+ std::swap(Weights[i->getCaseIndex() + 1], Weights.back());
Weights.pop_back();
}
- i.getCaseSuccessor()->removePredecessor(TI->getParent());
+ i->getCaseSuccessor()->removePredecessor(TI->getParent());
SI->removeCase(i);
}
}
@@ -996,8 +999,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
SmallSetVector<BasicBlock*, 4> FailBlocks;
if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
for (auto *Succ : FailBlocks) {
- std::vector<BasicBlock*> Blocks = { TI->getParent() };
- if (!SplitBlockPredecessors(Succ, Blocks, ".fold.split"))
+ if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split"))
return false;
}
}
@@ -1280,7 +1282,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
if (!isa<CallInst>(I1))
I1->setDebugLoc(
DILocation::getMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()));
-
+
I2->eraseFromParent();
Changed = true;
@@ -1472,29 +1474,28 @@ static bool canSinkInstructions(
return false;
}
+ // Because SROA can't handle speculating stores of selects, try not
+ // to sink loads or stores of allocas when we'd have to create a PHI for
+ // the address operand. Also, because it is likely that loads or stores
+ // of allocas will disappear when Mem2Reg/SROA is run, don't sink them.
+ // This can cause code churn which can have unintended consequences down
+ // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
+ // FIXME: This is a workaround for a deficiency in SROA - see
+ // https://llvm.org/bugs/show_bug.cgi?id=30188
+ if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) {
+ return isa<AllocaInst>(I->getOperand(1));
+ }))
+ return false;
+ if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) {
+ return isa<AllocaInst>(I->getOperand(0));
+ }))
+ return false;
+
for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
if (I0->getOperand(OI)->getType()->isTokenTy())
// Don't touch any operand of token type.
return false;
- // Because SROA can't handle speculating stores of selects, try not
- // to sink loads or stores of allocas when we'd have to create a PHI for
- // the address operand. Also, because it is likely that loads or stores
- // of allocas will disappear when Mem2Reg/SROA is run, don't sink them.
- // This can cause code churn which can have unintended consequences down
- // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
- // FIXME: This is a workaround for a deficiency in SROA - see
- // https://llvm.org/bugs/show_bug.cgi?id=30188
- if (OI == 1 && isa<StoreInst>(I0) &&
- any_of(Insts, [](const Instruction *I) {
- return isa<AllocaInst>(I->getOperand(1));
- }))
- return false;
- if (OI == 0 && isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) {
- return isa<AllocaInst>(I->getOperand(0));
- }))
- return false;
-
auto SameAsI0 = [&I0, OI](const Instruction *I) {
assert(I->getNumOperands() == I0->getNumOperands());
return I->getOperand(OI) == I0->getOperand(OI);
@@ -1546,7 +1547,7 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
}))
return false;
}
-
+
// We don't need to do any more checking here; canSinkLastInstruction should
// have done it all for us.
SmallVector<Value*, 4> NewOperands;
@@ -1653,7 +1654,7 @@ namespace {
bool isValid() const {
return !Fail;
}
-
+
void operator -- () {
if (Fail)
return;
@@ -1699,7 +1700,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
// / \
// [f(1)] [if]
// | | \
- // | | \
+ // | | |
// | [f(2)]|
// \ | /
// [ end ]
@@ -1737,7 +1738,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
}
if (UnconditionalPreds.size() < 2)
return false;
-
+
bool Changed = false;
// We take a two-step approach to tail sinking. First we scan from the end of
// each block upwards in lockstep. If the n'th instruction from the end of each
@@ -1767,7 +1768,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
NumPHIInsts++;
-
+
return NumPHIInsts <= 1;
};
@@ -1790,7 +1791,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
}
if (!Profitable)
return false;
-
+
DEBUG(dbgs() << "SINK: Splitting edge\n");
// We have a conditional edge and we're going to sink some instructions.
// Insert a new block postdominating all blocks we're going to sink from.
@@ -1800,7 +1801,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
return false;
Changed = true;
}
-
+
// Now that we've analyzed all potential sinking candidates, perform the
// actual sink. We iteratively sink the last non-terminator of the source
// blocks into their common successor unless doing so would require too
@@ -1826,7 +1827,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
DEBUG(dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
break;
}
-
+
if (!sinkLastInstruction(UnconditionalPreds))
return Changed;
NumSinkCommons++;
@@ -2078,6 +2079,9 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
Value *S = Builder.CreateSelect(
BrCond, TrueV, FalseV, TrueV->getName() + "." + FalseV->getName(), BI);
SpeculatedStore->setOperand(0, S);
+ SpeculatedStore->setDebugLoc(
+ DILocation::getMergedLocation(
+ BI->getDebugLoc(), SpeculatedStore->getDebugLoc()));
}
// Metadata can be dependent on the condition we are hoisting above.
@@ -2147,7 +2151,8 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
/// If we have a conditional branch on a PHI node value that is defined in the
/// same block as the branch and if any PHI entries are constants, thread edges
/// corresponding to that entry to be branches to their ultimate destination.
-static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) {
+static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
+ AssumptionCache *AC) {
BasicBlock *BB = BI->getParent();
PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
// NOTE: we currently cannot transform this case if the PHI node is used
@@ -2239,6 +2244,11 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) {
// Insert the new instruction into its new home.
if (N)
EdgeBB->getInstList().insert(InsertPt, N);
+
+ // Register the new instruction with the assumption cache if necessary.
+ if (auto *II = dyn_cast_or_null<IntrinsicInst>(N))
+ if (II->getIntrinsicID() == Intrinsic::assume)
+ AC->registerAssumption(II);
}
// Loop over all of the edges from PredBB to BB, changing them to branch
@@ -2251,7 +2261,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) {
}
// Recurse, simplifying any other constants.
- return FoldCondBranchOnPHI(BI, DL) | true;
+ return FoldCondBranchOnPHI(BI, DL, AC) | true;
}
return false;
@@ -3433,8 +3443,8 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
// Find the relevant condition and destinations.
Value *Condition = Select->getCondition();
- BasicBlock *TrueBB = SI->findCaseValue(TrueVal).getCaseSuccessor();
- BasicBlock *FalseBB = SI->findCaseValue(FalseVal).getCaseSuccessor();
+ BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
+ BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
// Get weight for TrueBB and FalseBB.
uint32_t TrueWeight = 0, FalseWeight = 0;
@@ -3444,9 +3454,9 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
GetBranchWeights(SI, Weights);
if (Weights.size() == 1 + SI->getNumCases()) {
TrueWeight =
- (uint32_t)Weights[SI->findCaseValue(TrueVal).getSuccessorIndex()];
+ (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
FalseWeight =
- (uint32_t)Weights[SI->findCaseValue(FalseVal).getSuccessorIndex()];
+ (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
}
}
@@ -4148,15 +4158,16 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
}
}
} else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e;
- ++i)
- if (i.getCaseSuccessor() == BB) {
- BB->removePredecessor(SI->getParent());
- SI->removeCase(i);
- --i;
- --e;
- Changed = true;
+ for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) {
+ if (i->getCaseSuccessor() != BB) {
+ ++i;
+ continue;
}
+ BB->removePredecessor(SI->getParent());
+ i = SI->removeCase(i);
+ e = SI->case_end();
+ Changed = true;
+ }
} else if (auto *II = dyn_cast<InvokeInst>(TI)) {
if (II->getUnwindDest() == BB) {
removeUnwindEdge(TI->getParent());
@@ -4239,18 +4250,18 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
SmallVector<ConstantInt *, 16> CasesA;
SmallVector<ConstantInt *, 16> CasesB;
- for (SwitchInst::CaseIt I : SI->cases()) {
- BasicBlock *Dest = I.getCaseSuccessor();
+ for (auto Case : SI->cases()) {
+ BasicBlock *Dest = Case.getCaseSuccessor();
if (!DestA)
DestA = Dest;
if (Dest == DestA) {
- CasesA.push_back(I.getCaseValue());
+ CasesA.push_back(Case.getCaseValue());
continue;
}
if (!DestB)
DestB = Dest;
if (Dest == DestB) {
- CasesB.push_back(I.getCaseValue());
+ CasesB.push_back(Case.getCaseValue());
continue;
}
return false; // More than two destinations.
@@ -4375,7 +4386,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
bool HasDefault =
!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
const unsigned NumUnknownBits =
- Bits - (KnownZero.Or(KnownOne)).countPopulation();
+ Bits - (KnownZero | KnownOne).countPopulation();
assert(NumUnknownBits <= Bits);
if (HasDefault && DeadCases.empty() &&
NumUnknownBits < 64 /* avoid overflow */ &&
@@ -4400,17 +4411,17 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
// Remove dead cases from the switch.
for (ConstantInt *DeadCase : DeadCases) {
- SwitchInst::CaseIt Case = SI->findCaseValue(DeadCase);
- assert(Case != SI->case_default() &&
+ SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
+ assert(CaseI != SI->case_default() &&
"Case was not found. Probably mistake in DeadCases forming.");
if (HasWeight) {
- std::swap(Weights[Case.getCaseIndex() + 1], Weights.back());
+ std::swap(Weights[CaseI->getCaseIndex() + 1], Weights.back());
Weights.pop_back();
}
// Prune unused values from PHI nodes.
- Case.getCaseSuccessor()->removePredecessor(SI->getParent());
- SI->removeCase(Case);
+ CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
+ SI->removeCase(CaseI);
}
if (HasWeight && Weights.size() >= 2) {
SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
@@ -4464,10 +4475,9 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
typedef DenseMap<PHINode *, SmallVector<int, 4>> ForwardingNodesMap;
ForwardingNodesMap ForwardingNodes;
- for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E;
- ++I) {
- ConstantInt *CaseValue = I.getCaseValue();
- BasicBlock *CaseDest = I.getCaseSuccessor();
+ for (auto Case : SI->cases()) {
+ ConstantInt *CaseValue = Case.getCaseValue();
+ BasicBlock *CaseDest = Case.getCaseSuccessor();
int PhiIndex;
PHINode *PHI =
@@ -5202,8 +5212,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// common destination, as well as the min and max case values.
assert(SI->case_begin() != SI->case_end());
SwitchInst::CaseIt CI = SI->case_begin();
- ConstantInt *MinCaseVal = CI.getCaseValue();
- ConstantInt *MaxCaseVal = CI.getCaseValue();
+ ConstantInt *MinCaseVal = CI->getCaseValue();
+ ConstantInt *MaxCaseVal = CI->getCaseValue();
BasicBlock *CommonDest = nullptr;
typedef SmallVector<std::pair<ConstantInt *, Constant *>, 4> ResultListTy;
@@ -5213,7 +5223,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
SmallVector<PHINode *, 4> PHIs;
for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
- ConstantInt *CaseVal = CI.getCaseValue();
+ ConstantInt *CaseVal = CI->getCaseValue();
if (CaseVal->getValue().slt(MinCaseVal->getValue()))
MinCaseVal = CaseVal;
if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
@@ -5222,7 +5232,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Resulting value at phi nodes for this case value.
typedef SmallVector<std::pair<PHINode *, Constant *>, 4> ResultsTy;
ResultsTy Results;
- if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest,
+ if (!GetCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
Results, DL, TTI))
return false;
@@ -5503,11 +5513,10 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
auto *Rot = Builder.CreateOr(LShr, Shl);
SI->replaceUsesOfWith(SI->getCondition(), Rot);
- for (SwitchInst::CaseIt C = SI->case_begin(), E = SI->case_end(); C != E;
- ++C) {
- auto *Orig = C.getCaseValue();
+ for (auto Case : SI->cases()) {
+ auto *Orig = Case.getCaseValue();
auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
- C.setValue(
+ Case.setValue(
cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue()))));
}
return true;
@@ -5553,7 +5562,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
if (ForwardSwitchConditionToPHI(SI))
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
- if (SwitchToLookupTable(SI, Builder, DL, TTI))
+ // The conversion from switch to lookup tables results in difficult
+ // to analyze code and makes pruning branches much harder.
+ // This is a problem of the switch expression itself can still be
+ // restricted as a result of inlining or CVP. There only apply this
+ // transformation during late steps of the optimisation chain.
+ if (LateSimplifyCFG && SwitchToLookupTable(SI, Builder, DL, TTI))
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
if (ReduceSwitchRange(SI, Builder, DL, TTI))
@@ -5833,7 +5847,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// through this block if any PHI node entries are constants.
if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
if (PN->getParent() == BI->getParent())
- if (FoldCondBranchOnPHI(BI, DL))
+ if (FoldCondBranchOnPHI(BI, DL, AC))
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
// Scan predecessor blocks for conditional branches.
@@ -6012,8 +6026,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
///
bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
unsigned BonusInstThreshold, AssumptionCache *AC,
- SmallPtrSetImpl<BasicBlock *> *LoopHeaders) {
+ SmallPtrSetImpl<BasicBlock *> *LoopHeaders,
+ bool LateSimplifyCFG) {
return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(),
- BonusInstThreshold, AC, LoopHeaders)
+ BonusInstThreshold, AC, LoopHeaders, LateSimplifyCFG)
.run(BB);
}
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 6b1d3dc41330..a4cc6a031ad4 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -35,6 +35,9 @@ using namespace llvm;
STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
STATISTIC(NumElimOperand, "Number of IV operands folded into a use");
STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
+STATISTIC(
+ NumSimplifiedSDiv,
+ "Number of IV signed division operations converted to unsigned division");
STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
namespace {
@@ -75,6 +78,7 @@ namespace {
void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
bool IsSigned);
+ bool eliminateSDiv(BinaryOperator *SDiv);
bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand);
};
}
@@ -265,6 +269,33 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
Changed = true;
}
+bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) {
+ // Get the SCEVs for the ICmp operands.
+ auto *N = SE->getSCEV(SDiv->getOperand(0));
+ auto *D = SE->getSCEV(SDiv->getOperand(1));
+
+ // Simplify unnecessary loops away.
+ const Loop *L = LI->getLoopFor(SDiv->getParent());
+ N = SE->getSCEVAtScope(N, L);
+ D = SE->getSCEVAtScope(D, L);
+
+ // Replace sdiv by udiv if both of the operands are non-negative
+ if (SE->isKnownNonNegative(N) && SE->isKnownNonNegative(D)) {
+ auto *UDiv = BinaryOperator::Create(
+ BinaryOperator::UDiv, SDiv->getOperand(0), SDiv->getOperand(1),
+ SDiv->getName() + ".udiv", SDiv);
+ UDiv->setIsExact(SDiv->isExact());
+ SDiv->replaceAllUsesWith(UDiv);
+ DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv << '\n');
+ ++NumSimplifiedSDiv;
+ Changed = true;
+ DeadInsts.push_back(SDiv);
+ return true;
+ }
+
+ return false;
+}
+
/// SimplifyIVUsers helper for eliminating useless
/// remainder operations operating on an induction variable.
void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
@@ -426,12 +457,15 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
eliminateIVComparison(ICmp, IVOperand);
return true;
}
- if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
- bool IsSigned = Rem->getOpcode() == Instruction::SRem;
- if (IsSigned || Rem->getOpcode() == Instruction::URem) {
- eliminateIVRemainder(Rem, IVOperand, IsSigned);
+ if (BinaryOperator *Bin = dyn_cast<BinaryOperator>(UseInst)) {
+ bool IsSRem = Bin->getOpcode() == Instruction::SRem;
+ if (IsSRem || Bin->getOpcode() == Instruction::URem) {
+ eliminateIVRemainder(Bin, IVOperand, IsSRem);
return true;
}
+
+ if (Bin->getOpcode() == Instruction::SDiv)
+ return eliminateSDiv(Bin);
}
if (auto *CI = dyn_cast<CallInst>(UseInst))
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index 1220490123ce..f6070868de44 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -35,7 +36,8 @@ using namespace llvm;
STATISTIC(NumSimplified, "Number of redundant instructions removed");
static bool runImpl(Function &F, const DominatorTree *DT,
- const TargetLibraryInfo *TLI, AssumptionCache *AC) {
+ const TargetLibraryInfo *TLI, AssumptionCache *AC,
+ OptimizationRemarkEmitter *ORE) {
const DataLayout &DL = F.getParent()->getDataLayout();
SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
bool Changed = false;
@@ -54,7 +56,7 @@ static bool runImpl(Function &F, const DominatorTree *DT,
// Don't waste time simplifying unused instructions.
if (!I->use_empty()) {
- if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) {
+ if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC, ORE)) {
// Mark all uses for resimplification next time round the loop.
for (User *U : I->users())
Next->insert(cast<Instruction>(U));
@@ -95,6 +97,7 @@ namespace {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
}
/// runOnFunction - Remove instructions that simplify.
@@ -108,7 +111,10 @@ namespace {
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
AssumptionCache *AC =
&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- return runImpl(F, DT, TLI, AC);
+ OptimizationRemarkEmitter *ORE =
+ &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+
+ return runImpl(F, DT, TLI, AC, ORE);
}
};
}
@@ -119,6 +125,7 @@ INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(InstSimplifier, "instsimplify",
"Remove redundant instructions", false, false)
char &llvm::InstructionSimplifierID = InstSimplifier::ID;
@@ -133,9 +140,12 @@ PreservedAnalyses InstSimplifierPass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
- bool Changed = runImpl(F, &DT, &TLI, &AC);
+ auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ bool Changed = runImpl(F, &DT, &TLI, &AC, &ORE);
if (!Changed)
return PreservedAnalyses::all();
- // FIXME: This should also 'preserve the CFG'.
- return PreservedAnalyses::none();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 8eaeb1073a76..aa71e3669ea2 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -51,9 +51,9 @@ static cl::opt<bool>
// Helper Functions
//===----------------------------------------------------------------------===//
-static bool ignoreCallingConv(LibFunc::Func Func) {
- return Func == LibFunc::abs || Func == LibFunc::labs ||
- Func == LibFunc::llabs || Func == LibFunc::strlen;
+static bool ignoreCallingConv(LibFunc Func) {
+ return Func == LibFunc_abs || Func == LibFunc_labs ||
+ Func == LibFunc_llabs || Func == LibFunc_strlen;
}
static bool isCallingConvCCompatible(CallInst *CI) {
@@ -123,8 +123,8 @@ static bool callHasFloatingPointArgument(const CallInst *CI) {
/// \brief Check whether the overloaded unary floating point function
/// corresponding to \a Ty is available.
static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
- LibFunc::Func DoubleFn, LibFunc::Func FloatFn,
- LibFunc::Func LongDoubleFn) {
+ LibFunc DoubleFn, LibFunc FloatFn,
+ LibFunc LongDoubleFn) {
switch (Ty->getTypeID()) {
case Type::FloatTyID:
return TLI->has(FloatFn);
@@ -809,9 +809,9 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
// TODO: Does this belong in BuildLibCalls or should all of those similar
// functions be moved here?
-static Value *emitCalloc(Value *Num, Value *Size, const AttributeSet &Attrs,
+static Value *emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs,
IRBuilder<> &B, const TargetLibraryInfo &TLI) {
- LibFunc::Func Func;
+ LibFunc Func;
if (!TLI.getLibFunc("calloc", Func) || !TLI.has(Func))
return nullptr;
@@ -819,7 +819,7 @@ static Value *emitCalloc(Value *Num, Value *Size, const AttributeSet &Attrs,
const DataLayout &DL = M->getDataLayout();
IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext()));
Value *Calloc = M->getOrInsertFunction("calloc", Attrs, B.getInt8PtrTy(),
- PtrType, PtrType, nullptr);
+ PtrType, PtrType);
CallInst *CI = B.CreateCall(Calloc, { Num, Size }, "calloc");
if (const auto *F = dyn_cast<Function>(Calloc->stripPointerCasts()))
@@ -846,9 +846,9 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,
// Is the inner call really malloc()?
Function *InnerCallee = Malloc->getCalledFunction();
- LibFunc::Func Func;
+ LibFunc Func;
if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
- Func != LibFunc::malloc)
+ Func != LibFunc_malloc)
return nullptr;
// The memset must cover the same number of bytes that are malloc'd.
@@ -948,6 +948,20 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
return B.CreateFPExt(V, B.getDoubleTy());
}
+// Replace a libcall \p CI with a call to intrinsic \p IID
+static Value *replaceUnaryCall(CallInst *CI, IRBuilder<> &B, Intrinsic::ID IID) {
+ // Propagate fast-math flags from the existing call to the new call.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(CI->getFastMathFlags());
+
+ Module *M = CI->getModule();
+ Value *V = CI->getArgOperand(0);
+ Function *F = Intrinsic::getDeclaration(M, IID, CI->getType());
+ CallInst *NewCall = B.CreateCall(F, V);
+ NewCall->takeName(CI);
+ return NewCall;
+}
+
/// Shrink double -> float for binary functions like 'fmin/fmax'.
static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
@@ -1041,9 +1055,9 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
// pow(10.0, x) -> exp10(x)
if (Op1C->isExactlyValue(10.0) &&
- hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp10, LibFunc::exp10f,
- LibFunc::exp10l))
- return emitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp10), B,
+ hasUnaryFloatFn(TLI, Op1->getType(), LibFunc_exp10, LibFunc_exp10f,
+ LibFunc_exp10l))
+ return emitUnaryFloatFnCall(Op2, TLI->getName(LibFunc_exp10), B,
Callee->getAttributes());
}
@@ -1055,10 +1069,10 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
// pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1).
auto *OpC = dyn_cast<CallInst>(Op1);
if (OpC && OpC->hasUnsafeAlgebra() && CI->hasUnsafeAlgebra()) {
- LibFunc::Func Func;
+ LibFunc Func;
Function *OpCCallee = OpC->getCalledFunction();
if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) &&
- TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2)) {
+ TLI->has(Func) && (Func == LibFunc_exp || Func == LibFunc_exp2)) {
IRBuilder<>::FastMathFlagGuard Guard(B);
B.setFastMathFlags(CI->getFastMathFlags());
Value *FMul = B.CreateFMul(OpC->getArgOperand(0), Op2, "mul");
@@ -1075,17 +1089,20 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
return ConstantFP::get(CI->getType(), 1.0);
if (Op2C->isExactlyValue(-0.5) &&
- hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::sqrt, LibFunc::sqrtf,
- LibFunc::sqrtl)) {
+ hasUnaryFloatFn(TLI, Op2->getType(), LibFunc_sqrt, LibFunc_sqrtf,
+ LibFunc_sqrtl)) {
// If -ffast-math:
// pow(x, -0.5) -> 1.0 / sqrt(x)
if (CI->hasUnsafeAlgebra()) {
IRBuilder<>::FastMathFlagGuard Guard(B);
B.setFastMathFlags(CI->getFastMathFlags());
- // Here we cannot lower to an intrinsic because C99 sqrt() and llvm.sqrt
- // are not guaranteed to have the same semantics.
- Value *Sqrt = emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B,
+ // TODO: If the pow call is an intrinsic, we should lower to the sqrt
+ // intrinsic, so we match errno semantics. We also should check that the
+ // target can in fact lower the sqrt intrinsic -- we currently have no way
+ // to ask this question other than asking whether the target has a sqrt
+ // libcall, which is a sufficient but not necessary condition.
+ Value *Sqrt = emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc_sqrt), B,
Callee->getAttributes());
return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Sqrt, "sqrtrecip");
@@ -1093,19 +1110,17 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
}
if (Op2C->isExactlyValue(0.5) &&
- hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::sqrt, LibFunc::sqrtf,
- LibFunc::sqrtl) &&
- hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::fabs, LibFunc::fabsf,
- LibFunc::fabsl)) {
+ hasUnaryFloatFn(TLI, Op2->getType(), LibFunc_sqrt, LibFunc_sqrtf,
+ LibFunc_sqrtl)) {
// In -ffast-math, pow(x, 0.5) -> sqrt(x).
if (CI->hasUnsafeAlgebra()) {
IRBuilder<>::FastMathFlagGuard Guard(B);
B.setFastMathFlags(CI->getFastMathFlags());
- // Unlike other math intrinsics, sqrt has differerent semantics
- // from the libc function. See LangRef for details.
- return emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B,
+ // TODO: As above, we should lower to the sqrt intrinsic if the pow is an
+ // intrinsic, to match errno semantics.
+ return emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc_sqrt), B,
Callee->getAttributes());
}
@@ -1115,9 +1130,16 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
// TODO: In finite-only mode, this could be just fabs(sqrt(x)).
Value *Inf = ConstantFP::getInfinity(CI->getType());
Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
+
+ // TODO: As above, we should lower to the sqrt intrinsic if the pow is an
+ // intrinsic, to match errno semantics.
Value *Sqrt = emitUnaryFloatFnCall(Op1, "sqrt", B, Callee->getAttributes());
- Value *FAbs =
- emitUnaryFloatFnCall(Sqrt, "fabs", B, Callee->getAttributes());
+
+ Module *M = Callee->getParent();
+ Function *FabsF = Intrinsic::getDeclaration(M, Intrinsic::fabs,
+ CI->getType());
+ Value *FAbs = B.CreateCall(FabsF, Sqrt);
+
Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf);
Value *Sel = B.CreateSelect(FCmp, Inf, FAbs);
return Sel;
@@ -1173,11 +1195,11 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
Value *Op = CI->getArgOperand(0);
// Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
// Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
- LibFunc::Func LdExp = LibFunc::ldexpl;
+ LibFunc LdExp = LibFunc_ldexpl;
if (Op->getType()->isFloatTy())
- LdExp = LibFunc::ldexpf;
+ LdExp = LibFunc_ldexpf;
else if (Op->getType()->isDoubleTy())
- LdExp = LibFunc::ldexp;
+ LdExp = LibFunc_ldexp;
if (TLI->has(LdExp)) {
Value *LdExpArg = nullptr;
@@ -1197,7 +1219,7 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
Module *M = CI->getModule();
Value *NewCallee =
M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(),
- Op->getType(), B.getInt32Ty(), nullptr);
+ Op->getType(), B.getInt32Ty());
CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg});
if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -1208,15 +1230,6 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
return Ret;
}
-Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- StringRef Name = Callee->getName();
- if (Name == "fabs" && hasFloatVersion(Name))
- return optimizeUnaryDoubleFP(CI, B, false);
-
- return nullptr;
-}
-
Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
// If we can shrink the call to a float function rather than a double
@@ -1280,17 +1293,17 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
FMF.setUnsafeAlgebra();
B.setFastMathFlags(FMF);
- LibFunc::Func Func;
+ LibFunc Func;
Function *F = OpC->getCalledFunction();
if (F && ((TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
- Func == LibFunc::pow) || F->getIntrinsicID() == Intrinsic::pow))
+ Func == LibFunc_pow) || F->getIntrinsicID() == Intrinsic::pow))
return B.CreateFMul(OpC->getArgOperand(1),
emitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B,
Callee->getAttributes()), "mul");
// log(exp2(y)) -> y*log(2)
if (F && Name == "log" && TLI->getLibFunc(F->getName(), Func) &&
- TLI->has(Func) && Func == LibFunc::exp2)
+ TLI->has(Func) && Func == LibFunc_exp2)
return B.CreateFMul(
OpC->getArgOperand(0),
emitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0),
@@ -1302,8 +1315,11 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
Value *Ret = nullptr;
- if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" ||
- Callee->getIntrinsicID() == Intrinsic::sqrt))
+ // TODO: Once we have a way (other than checking for the existince of the
+ // libcall) to tell whether our target can lower @llvm.sqrt, relax the
+ // condition below.
+ if (TLI->has(LibFunc_sqrtf) && (Callee->getName() == "sqrt" ||
+ Callee->getIntrinsicID() == Intrinsic::sqrt))
Ret = optimizeUnaryDoubleFP(CI, B, true);
if (!CI->hasUnsafeAlgebra())
@@ -1385,12 +1401,12 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) {
// tan(atan(x)) -> x
// tanf(atanf(x)) -> x
// tanl(atanl(x)) -> x
- LibFunc::Func Func;
+ LibFunc Func;
Function *F = OpC->getCalledFunction();
if (F && TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
- ((Func == LibFunc::atan && Callee->getName() == "tan") ||
- (Func == LibFunc::atanf && Callee->getName() == "tanf") ||
- (Func == LibFunc::atanl && Callee->getName() == "tanl")))
+ ((Func == LibFunc_atan && Callee->getName() == "tan") ||
+ (Func == LibFunc_atanf && Callee->getName() == "tanf") ||
+ (Func == LibFunc_atanl && Callee->getName() == "tanl")))
Ret = OpC->getArgOperand(0);
return Ret;
}
@@ -1427,7 +1443,7 @@ static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
Module *M = OrigCallee->getParent();
Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(),
- ResTy, ArgTy, nullptr);
+ ResTy, ArgTy);
if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
// If the argument is an instruction, it must dominate all uses so put our
@@ -1508,24 +1524,24 @@ void LibCallSimplifier::classifyArgUse(
return;
Function *Callee = CI->getCalledFunction();
- LibFunc::Func Func;
+ LibFunc Func;
if (!Callee || !TLI->getLibFunc(*Callee, Func) || !TLI->has(Func) ||
!isTrigLibCall(CI))
return;
if (IsFloat) {
- if (Func == LibFunc::sinpif)
+ if (Func == LibFunc_sinpif)
SinCalls.push_back(CI);
- else if (Func == LibFunc::cospif)
+ else if (Func == LibFunc_cospif)
CosCalls.push_back(CI);
- else if (Func == LibFunc::sincospif_stret)
+ else if (Func == LibFunc_sincospif_stret)
SinCosCalls.push_back(CI);
} else {
- if (Func == LibFunc::sinpi)
+ if (Func == LibFunc_sinpi)
SinCalls.push_back(CI);
- else if (Func == LibFunc::cospi)
+ else if (Func == LibFunc_cospi)
CosCalls.push_back(CI);
- else if (Func == LibFunc::sincospi_stret)
+ else if (Func == LibFunc_sincospi_stret)
SinCosCalls.push_back(CI);
}
}
@@ -1609,7 +1625,7 @@ Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B,
// Proceedings of PACT'98, Oct. 1998, IEEE
if (!CI->hasFnAttr(Attribute::Cold) &&
isReportingError(Callee, CI, StreamArg)) {
- CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold);
+ CI->addAttribute(AttributeList::FunctionIndex, Attribute::Cold);
}
return nullptr;
@@ -1699,7 +1715,7 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) {
// printf(format, ...) -> iprintf(format, ...) if no floating point
// arguments.
- if (TLI->has(LibFunc::iprintf) && !callHasFloatingPointArgument(CI)) {
+ if (TLI->has(LibFunc_iprintf) && !callHasFloatingPointArgument(CI)) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
Constant *IPrintFFn =
M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
@@ -1780,7 +1796,7 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) {
// sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
// point arguments.
- if (TLI->has(LibFunc::siprintf) && !callHasFloatingPointArgument(CI)) {
+ if (TLI->has(LibFunc_siprintf) && !callHasFloatingPointArgument(CI)) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
Constant *SIPrintFFn =
M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
@@ -1850,7 +1866,7 @@ Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilder<> &B) {
// fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
// floating point arguments.
- if (TLI->has(LibFunc::fiprintf) && !callHasFloatingPointArgument(CI)) {
+ if (TLI->has(LibFunc_fiprintf) && !callHasFloatingPointArgument(CI)) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
Constant *FIPrintFFn =
M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
@@ -1929,7 +1945,7 @@ Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
}
bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) {
- LibFunc::Func Func;
+ LibFunc Func;
SmallString<20> FloatFuncName = FuncName;
FloatFuncName += 'f';
if (TLI->getLibFunc(FloatFuncName, Func))
@@ -1939,7 +1955,7 @@ bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) {
Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
IRBuilder<> &Builder) {
- LibFunc::Func Func;
+ LibFunc Func;
Function *Callee = CI->getCalledFunction();
// Check for string/memory library functions.
if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) {
@@ -1948,51 +1964,51 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
isCallingConvCCompatible(CI)) &&
"Optimizing string/memory libcall would change the calling convention");
switch (Func) {
- case LibFunc::strcat:
+ case LibFunc_strcat:
return optimizeStrCat(CI, Builder);
- case LibFunc::strncat:
+ case LibFunc_strncat:
return optimizeStrNCat(CI, Builder);
- case LibFunc::strchr:
+ case LibFunc_strchr:
return optimizeStrChr(CI, Builder);
- case LibFunc::strrchr:
+ case LibFunc_strrchr:
return optimizeStrRChr(CI, Builder);
- case LibFunc::strcmp:
+ case LibFunc_strcmp:
return optimizeStrCmp(CI, Builder);
- case LibFunc::strncmp:
+ case LibFunc_strncmp:
return optimizeStrNCmp(CI, Builder);
- case LibFunc::strcpy:
+ case LibFunc_strcpy:
return optimizeStrCpy(CI, Builder);
- case LibFunc::stpcpy:
+ case LibFunc_stpcpy:
return optimizeStpCpy(CI, Builder);
- case LibFunc::strncpy:
+ case LibFunc_strncpy:
return optimizeStrNCpy(CI, Builder);
- case LibFunc::strlen:
+ case LibFunc_strlen:
return optimizeStrLen(CI, Builder);
- case LibFunc::strpbrk:
+ case LibFunc_strpbrk:
return optimizeStrPBrk(CI, Builder);
- case LibFunc::strtol:
- case LibFunc::strtod:
- case LibFunc::strtof:
- case LibFunc::strtoul:
- case LibFunc::strtoll:
- case LibFunc::strtold:
- case LibFunc::strtoull:
+ case LibFunc_strtol:
+ case LibFunc_strtod:
+ case LibFunc_strtof:
+ case LibFunc_strtoul:
+ case LibFunc_strtoll:
+ case LibFunc_strtold:
+ case LibFunc_strtoull:
return optimizeStrTo(CI, Builder);
- case LibFunc::strspn:
+ case LibFunc_strspn:
return optimizeStrSpn(CI, Builder);
- case LibFunc::strcspn:
+ case LibFunc_strcspn:
return optimizeStrCSpn(CI, Builder);
- case LibFunc::strstr:
+ case LibFunc_strstr:
return optimizeStrStr(CI, Builder);
- case LibFunc::memchr:
+ case LibFunc_memchr:
return optimizeMemChr(CI, Builder);
- case LibFunc::memcmp:
+ case LibFunc_memcmp:
return optimizeMemCmp(CI, Builder);
- case LibFunc::memcpy:
+ case LibFunc_memcpy:
return optimizeMemCpy(CI, Builder);
- case LibFunc::memmove:
+ case LibFunc_memmove:
return optimizeMemMove(CI, Builder);
- case LibFunc::memset:
+ case LibFunc_memset:
return optimizeMemSet(CI, Builder);
default:
break;
@@ -2005,7 +2021,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
if (CI->isNoBuiltin())
return nullptr;
- LibFunc::Func Func;
+ LibFunc Func;
Function *Callee = CI->getCalledFunction();
StringRef FuncName = Callee->getName();
@@ -2029,8 +2045,6 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
return optimizePow(CI, Builder);
case Intrinsic::exp2:
return optimizeExp2(CI, Builder);
- case Intrinsic::fabs:
- return optimizeFabs(CI, Builder);
case Intrinsic::log:
return optimizeLog(CI, Builder);
case Intrinsic::sqrt:
@@ -2067,114 +2081,117 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
if (Value *V = optimizeStringMemoryLibCall(CI, Builder))
return V;
switch (Func) {
- case LibFunc::cosf:
- case LibFunc::cos:
- case LibFunc::cosl:
+ case LibFunc_cosf:
+ case LibFunc_cos:
+ case LibFunc_cosl:
return optimizeCos(CI, Builder);
- case LibFunc::sinpif:
- case LibFunc::sinpi:
- case LibFunc::cospif:
- case LibFunc::cospi:
+ case LibFunc_sinpif:
+ case LibFunc_sinpi:
+ case LibFunc_cospif:
+ case LibFunc_cospi:
return optimizeSinCosPi(CI, Builder);
- case LibFunc::powf:
- case LibFunc::pow:
- case LibFunc::powl:
+ case LibFunc_powf:
+ case LibFunc_pow:
+ case LibFunc_powl:
return optimizePow(CI, Builder);
- case LibFunc::exp2l:
- case LibFunc::exp2:
- case LibFunc::exp2f:
+ case LibFunc_exp2l:
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
return optimizeExp2(CI, Builder);
- case LibFunc::fabsf:
- case LibFunc::fabs:
- case LibFunc::fabsl:
- return optimizeFabs(CI, Builder);
- case LibFunc::sqrtf:
- case LibFunc::sqrt:
- case LibFunc::sqrtl:
+ case LibFunc_fabsf:
+ case LibFunc_fabs:
+ case LibFunc_fabsl:
+ return replaceUnaryCall(CI, Builder, Intrinsic::fabs);
+ case LibFunc_sqrtf:
+ case LibFunc_sqrt:
+ case LibFunc_sqrtl:
return optimizeSqrt(CI, Builder);
- case LibFunc::ffs:
- case LibFunc::ffsl:
- case LibFunc::ffsll:
+ case LibFunc_ffs:
+ case LibFunc_ffsl:
+ case LibFunc_ffsll:
return optimizeFFS(CI, Builder);
- case LibFunc::fls:
- case LibFunc::flsl:
- case LibFunc::flsll:
+ case LibFunc_fls:
+ case LibFunc_flsl:
+ case LibFunc_flsll:
return optimizeFls(CI, Builder);
- case LibFunc::abs:
- case LibFunc::labs:
- case LibFunc::llabs:
+ case LibFunc_abs:
+ case LibFunc_labs:
+ case LibFunc_llabs:
return optimizeAbs(CI, Builder);
- case LibFunc::isdigit:
+ case LibFunc_isdigit:
return optimizeIsDigit(CI, Builder);
- case LibFunc::isascii:
+ case LibFunc_isascii:
return optimizeIsAscii(CI, Builder);
- case LibFunc::toascii:
+ case LibFunc_toascii:
return optimizeToAscii(CI, Builder);
- case LibFunc::printf:
+ case LibFunc_printf:
return optimizePrintF(CI, Builder);
- case LibFunc::sprintf:
+ case LibFunc_sprintf:
return optimizeSPrintF(CI, Builder);
- case LibFunc::fprintf:
+ case LibFunc_fprintf:
return optimizeFPrintF(CI, Builder);
- case LibFunc::fwrite:
+ case LibFunc_fwrite:
return optimizeFWrite(CI, Builder);
- case LibFunc::fputs:
+ case LibFunc_fputs:
return optimizeFPuts(CI, Builder);
- case LibFunc::log:
- case LibFunc::log10:
- case LibFunc::log1p:
- case LibFunc::log2:
- case LibFunc::logb:
+ case LibFunc_log:
+ case LibFunc_log10:
+ case LibFunc_log1p:
+ case LibFunc_log2:
+ case LibFunc_logb:
return optimizeLog(CI, Builder);
- case LibFunc::puts:
+ case LibFunc_puts:
return optimizePuts(CI, Builder);
- case LibFunc::tan:
- case LibFunc::tanf:
- case LibFunc::tanl:
+ case LibFunc_tan:
+ case LibFunc_tanf:
+ case LibFunc_tanl:
return optimizeTan(CI, Builder);
- case LibFunc::perror:
+ case LibFunc_perror:
return optimizeErrorReporting(CI, Builder);
- case LibFunc::vfprintf:
- case LibFunc::fiprintf:
+ case LibFunc_vfprintf:
+ case LibFunc_fiprintf:
return optimizeErrorReporting(CI, Builder, 0);
- case LibFunc::fputc:
+ case LibFunc_fputc:
return optimizeErrorReporting(CI, Builder, 1);
- case LibFunc::ceil:
- case LibFunc::floor:
- case LibFunc::rint:
- case LibFunc::round:
- case LibFunc::nearbyint:
- case LibFunc::trunc:
- if (hasFloatVersion(FuncName))
- return optimizeUnaryDoubleFP(CI, Builder, false);
- return nullptr;
- case LibFunc::acos:
- case LibFunc::acosh:
- case LibFunc::asin:
- case LibFunc::asinh:
- case LibFunc::atan:
- case LibFunc::atanh:
- case LibFunc::cbrt:
- case LibFunc::cosh:
- case LibFunc::exp:
- case LibFunc::exp10:
- case LibFunc::expm1:
- case LibFunc::sin:
- case LibFunc::sinh:
- case LibFunc::tanh:
+ case LibFunc_ceil:
+ return replaceUnaryCall(CI, Builder, Intrinsic::ceil);
+ case LibFunc_floor:
+ return replaceUnaryCall(CI, Builder, Intrinsic::floor);
+ case LibFunc_round:
+ return replaceUnaryCall(CI, Builder, Intrinsic::round);
+ case LibFunc_nearbyint:
+ return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint);
+ case LibFunc_rint:
+ return replaceUnaryCall(CI, Builder, Intrinsic::rint);
+ case LibFunc_trunc:
+ return replaceUnaryCall(CI, Builder, Intrinsic::trunc);
+ case LibFunc_acos:
+ case LibFunc_acosh:
+ case LibFunc_asin:
+ case LibFunc_asinh:
+ case LibFunc_atan:
+ case LibFunc_atanh:
+ case LibFunc_cbrt:
+ case LibFunc_cosh:
+ case LibFunc_exp:
+ case LibFunc_exp10:
+ case LibFunc_expm1:
+ case LibFunc_sin:
+ case LibFunc_sinh:
+ case LibFunc_tanh:
if (UnsafeFPShrink && hasFloatVersion(FuncName))
return optimizeUnaryDoubleFP(CI, Builder, true);
return nullptr;
- case LibFunc::copysign:
+ case LibFunc_copysign:
if (hasFloatVersion(FuncName))
return optimizeBinaryDoubleFP(CI, Builder);
return nullptr;
- case LibFunc::fminf:
- case LibFunc::fmin:
- case LibFunc::fminl:
- case LibFunc::fmaxf:
- case LibFunc::fmax:
- case LibFunc::fmaxl:
+ case LibFunc_fminf:
+ case LibFunc_fmin:
+ case LibFunc_fminl:
+ case LibFunc_fmaxf:
+ case LibFunc_fmax:
+ case LibFunc_fmaxl:
return optimizeFMinFMax(CI, Builder);
default:
return nullptr;
@@ -2211,16 +2228,10 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
// * log(exp10(y)) -> y*log(10)
// * log(sqrt(x)) -> 0.5*log(x)
//
-// lround, lroundf, lroundl:
-// * lround(cnst) -> cnst'
-//
// pow, powf, powl:
// * pow(sqrt(x),y) -> pow(x,y*0.5)
// * pow(pow(x,y),z)-> pow(x,y*z)
//
-// round, roundf, roundl:
-// * round(cnst) -> cnst'
-//
// signbit:
// * signbit(cnst) -> cnst'
// * signbit(nncst) -> 0 (if pstv is a non-negative constant)
@@ -2230,10 +2241,6 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
// * sqrt(Nroot(x)) -> pow(x,1/(2*N))
// * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
//
-// trunc, truncf, truncl:
-// * trunc(cnst) -> cnst'
-//
-//
//===----------------------------------------------------------------------===//
// Fortified Library Call Optimizations
@@ -2300,7 +2307,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
IRBuilder<> &B,
- LibFunc::Func Func) {
+ LibFunc Func) {
Function *Callee = CI->getCalledFunction();
StringRef Name = Callee->getName();
const DataLayout &DL = CI->getModule()->getDataLayout();
@@ -2308,7 +2315,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
*ObjSize = CI->getArgOperand(2);
// __stpcpy_chk(x,x,...) -> x+strlen(x)
- if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
+ if (Func == LibFunc_stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
Value *StrLen = emitStrLen(Src, B, DL, TLI);
return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
}
@@ -2334,14 +2341,14 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
Value *Ret = emitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
// If the function was an __stpcpy_chk, and we were able to fold it into
// a __memcpy_chk, we still need to return the correct end pointer.
- if (Ret && Func == LibFunc::stpcpy_chk)
+ if (Ret && Func == LibFunc_stpcpy_chk)
return B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(SizeTTy, Len - 1));
return Ret;
}
Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
IRBuilder<> &B,
- LibFunc::Func Func) {
+ LibFunc Func) {
Function *Callee = CI->getCalledFunction();
StringRef Name = Callee->getName();
if (isFortifiedCallFoldable(CI, 3, 2, false)) {
@@ -2366,7 +2373,7 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
//
// PR23093.
- LibFunc::Func Func;
+ LibFunc Func;
Function *Callee = CI->getCalledFunction();
SmallVector<OperandBundleDef, 2> OpBundles;
@@ -2384,17 +2391,17 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
return nullptr;
switch (Func) {
- case LibFunc::memcpy_chk:
+ case LibFunc_memcpy_chk:
return optimizeMemCpyChk(CI, Builder);
- case LibFunc::memmove_chk:
+ case LibFunc_memmove_chk:
return optimizeMemMoveChk(CI, Builder);
- case LibFunc::memset_chk:
+ case LibFunc_memset_chk:
return optimizeMemSetChk(CI, Builder);
- case LibFunc::stpcpy_chk:
- case LibFunc::strcpy_chk:
+ case LibFunc_stpcpy_chk:
+ case LibFunc_strcpy_chk:
return optimizeStrpCpyChk(CI, Builder, Func);
- case LibFunc::stpncpy_chk:
- case LibFunc::strncpy_chk:
+ case LibFunc_stpncpy_chk:
+ case LibFunc_strncpy_chk:
return optimizeStrpNCpyChk(CI, Builder, Func);
default:
break;
diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp
index 7b9de2eadc61..7106483c3bd2 100644
--- a/lib/Transforms/Utils/Utils.cpp
+++ b/lib/Transforms/Utils/Utils.cpp
@@ -35,9 +35,8 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) {
initializeUnifyFunctionExitNodesPass(Registry);
initializeInstSimplifierPass(Registry);
initializeMetaRenamerPass(Registry);
- initializeMemorySSAWrapperPassPass(Registry);
- initializeMemorySSAPrinterLegacyPassPass(Registry);
initializeStripGCRelocatesPass(Registry);
+ initializePredicateInfoPrinterLegacyPassPass(Registry);
}
/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.
diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
new file mode 100644
index 000000000000..4aeea02b1b1b
--- /dev/null
+++ b/lib/Transforms/Utils/VNCoercion.cpp
@@ -0,0 +1,482 @@
+#include "llvm/Transforms/Utils/VNCoercion.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "vncoerce"
+namespace llvm {
+namespace VNCoercion {
+
+/// Return true if coerceAvailableValueToLoadType will succeed.
+bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
+ const DataLayout &DL) {
+ // If the loaded or stored value is an first class array or struct, don't try
+ // to transform them. We need to be able to bitcast to integer.
+ if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
+ StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy())
+ return false;
+
+ // The store has to be at least as big as the load.
+ if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy))
+ return false;
+
+ return true;
+}
+
+template <class T, class HelperClass>
+static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy,
+ HelperClass &Helper,
+ const DataLayout &DL) {
+ assert(canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) &&
+ "precondition violation - materialization can't fail");
+ if (auto *C = dyn_cast<Constant>(StoredVal))
+ if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
+ StoredVal = FoldedStoredVal;
+
+ // If this is already the right type, just return it.
+ Type *StoredValTy = StoredVal->getType();
+
+ uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy);
+ uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy);
+
+ // If the store and reload are the same size, we can always reuse it.
+ if (StoredValSize == LoadedValSize) {
+ // Pointer to Pointer -> use bitcast.
+ if (StoredValTy->getScalarType()->isPointerTy() &&
+ LoadedTy->getScalarType()->isPointerTy()) {
+ StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy);
+ } else {
+ // Convert source pointers to integers, which can be bitcast.
+ if (StoredValTy->getScalarType()->isPointerTy()) {
+ StoredValTy = DL.getIntPtrType(StoredValTy);
+ StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy);
+ }
+
+ Type *TypeToCastTo = LoadedTy;
+ if (TypeToCastTo->getScalarType()->isPointerTy())
+ TypeToCastTo = DL.getIntPtrType(TypeToCastTo);
+
+ if (StoredValTy != TypeToCastTo)
+ StoredVal = Helper.CreateBitCast(StoredVal, TypeToCastTo);
+
+ // Cast to pointer if the load needs a pointer type.
+ if (LoadedTy->getScalarType()->isPointerTy())
+ StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy);
+ }
+
+ if (auto *C = dyn_cast<ConstantExpr>(StoredVal))
+ if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
+ StoredVal = FoldedStoredVal;
+
+ return StoredVal;
+ }
+ // If the loaded value is smaller than the available value, then we can
+ // extract out a piece from it. If the available value is too small, then we
+ // can't do anything.
+ assert(StoredValSize >= LoadedValSize &&
+ "canCoerceMustAliasedValueToLoad fail");
+
+ // Convert source pointers to integers, which can be manipulated.
+ if (StoredValTy->getScalarType()->isPointerTy()) {
+ StoredValTy = DL.getIntPtrType(StoredValTy);
+ StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy);
+ }
+
+ // Convert vectors and fp to integer, which can be manipulated.
+ if (!StoredValTy->isIntegerTy()) {
+ StoredValTy = IntegerType::get(StoredValTy->getContext(), StoredValSize);
+ StoredVal = Helper.CreateBitCast(StoredVal, StoredValTy);
+ }
+
+ // If this is a big-endian system, we need to shift the value down to the low
+ // bits so that a truncate will work.
+ if (DL.isBigEndian()) {
+ uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy) -
+ DL.getTypeStoreSizeInBits(LoadedTy);
+ StoredVal = Helper.CreateLShr(
+ StoredVal, ConstantInt::get(StoredVal->getType(), ShiftAmt));
+ }
+
+ // Truncate the integer to the right size now.
+ Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadedValSize);
+ StoredVal = Helper.CreateTruncOrBitCast(StoredVal, NewIntTy);
+
+ if (LoadedTy != NewIntTy) {
+ // If the result is a pointer, inttoptr.
+ if (LoadedTy->getScalarType()->isPointerTy())
+ StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy);
+ else
+ // Otherwise, bitcast.
+ StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy);
+ }
+
+ if (auto *C = dyn_cast<Constant>(StoredVal))
+ if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
+ StoredVal = FoldedStoredVal;
+
+ return StoredVal;
+}
+
+/// If we saw a store of a value to memory, and
+/// then a load from a must-aliased pointer of a different type, try to coerce
+/// the stored value. LoadedTy is the type of the load we want to replace.
+/// IRB is IRBuilder used to insert new instructions.
+///
+/// If we can't do it, return null.
+Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
+ IRBuilder<> &IRB, const DataLayout &DL) {
+ return coerceAvailableValueToLoadTypeHelper(StoredVal, LoadedTy, IRB, DL);
+}
+
+/// This function is called when we have a memdep query of a load that ends up
+/// being a clobbering memory write (store, memset, memcpy, memmove). This
+/// means that the write *may* provide bits used by the load but we can't be
+/// sure because the pointers don't must-alias.
+///
+/// Check this case to see if there is anything more we can do before we give
+/// up. This returns -1 if we have to give up, or a byte number in the stored
+/// value of the piece that feeds the load.
+static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
+ Value *WritePtr,
+ uint64_t WriteSizeInBits,
+ const DataLayout &DL) {
+ // If the loaded or stored value is a first class array or struct, don't try
+ // to transform them. We need to be able to bitcast to integer.
+ if (LoadTy->isStructTy() || LoadTy->isArrayTy())
+ return -1;
+
+ int64_t StoreOffset = 0, LoadOffset = 0;
+ Value *StoreBase =
+ GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL);
+ Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL);
+ if (StoreBase != LoadBase)
+ return -1;
+
+ // If the load and store are to the exact same address, they should have been
+ // a must alias. AA must have gotten confused.
+ // FIXME: Study to see if/when this happens. One case is forwarding a memset
+ // to a load from the base of the memset.
+
+ // If the load and store don't overlap at all, the store doesn't provide
+ // anything to the load. In this case, they really don't alias at all, AA
+ // must have gotten confused.
+ uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy);
+
+ if ((WriteSizeInBits & 7) | (LoadSize & 7))
+ return -1;
+ uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes.
+ LoadSize /= 8;
+
+ bool isAAFailure = false;
+ if (StoreOffset < LoadOffset)
+ isAAFailure = StoreOffset + int64_t(StoreSize) <= LoadOffset;
+ else
+ isAAFailure = LoadOffset + int64_t(LoadSize) <= StoreOffset;
+
+ if (isAAFailure)
+ return -1;
+
+ // If the Load isn't completely contained within the stored bits, we don't
+ // have all the bits to feed it. We could do something crazy in the future
+ // (issue a smaller load then merge the bits in) but this seems unlikely to be
+ // valuable.
+ if (StoreOffset > LoadOffset ||
+ StoreOffset + StoreSize < LoadOffset + LoadSize)
+ return -1;
+
+ // Okay, we can do this transformation. Return the number of bytes into the
+ // store that the load is.
+ return LoadOffset - StoreOffset;
+}
+
+/// This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store.
+int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
+ StoreInst *DepSI, const DataLayout &DL) {
+ // Cannot handle reading from store of first-class aggregate yet.
+ if (DepSI->getValueOperand()->getType()->isStructTy() ||
+ DepSI->getValueOperand()->getType()->isArrayTy())
+ return -1;
+
+ Value *StorePtr = DepSI->getPointerOperand();
+ uint64_t StoreSize =
+ DL.getTypeSizeInBits(DepSI->getValueOperand()->getType());
+ return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize,
+ DL);
+}
+
+/// This function is called when we have a
+/// memdep query of a load that ends up being clobbered by another load. See if
+/// the other load can feed into the second load.
+int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
+ const DataLayout &DL) {
+ // Cannot handle reading from store of first-class aggregate yet.
+ if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
+ return -1;
+
+ Value *DepPtr = DepLI->getPointerOperand();
+ uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType());
+ int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL);
+ if (R != -1)
+ return R;
+
+ // If we have a load/load clobber an DepLI can be widened to cover this load,
+ // then we should widen it!
+ int64_t LoadOffs = 0;
+ const Value *LoadBase =
+ GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL);
+ unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
+
+ unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
+ LoadBase, LoadOffs, LoadSize, DepLI);
+ if (Size == 0)
+ return -1;
+
+ // Check non-obvious conditions enforced by MDA which we rely on for being
+ // able to materialize this potentially available value
+ assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!");
+ assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load");
+
+ return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size * 8, DL);
+}
+
+int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
+ MemIntrinsic *MI, const DataLayout &DL) {
+ // If the mem operation is a non-constant size, we can't handle it.
+ ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
+ if (!SizeCst)
+ return -1;
+ uint64_t MemSizeInBits = SizeCst->getZExtValue() * 8;
+
+ // If this is memset, we just need to see if the offset is valid in the size
+ // of the memset..
+ if (MI->getIntrinsicID() == Intrinsic::memset)
+ return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
+ MemSizeInBits, DL);
+
+ // If we have a memcpy/memmove, the only case we can handle is if this is a
+ // copy from constant memory. In that case, we can read directly from the
+ // constant memory.
+ MemTransferInst *MTI = cast<MemTransferInst>(MI);
+
+ Constant *Src = dyn_cast<Constant>(MTI->getSource());
+ if (!Src)
+ return -1;
+
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL));
+ if (!GV || !GV->isConstant())
+ return -1;
+
+ // See if the access is within the bounds of the transfer.
+ int Offset = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
+ MemSizeInBits, DL);
+ if (Offset == -1)
+ return Offset;
+
+ unsigned AS = Src->getType()->getPointerAddressSpace();
+ // Otherwise, see if we can constant fold a load from the constant with the
+ // offset applied as appropriate.
+ Src =
+ ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS));
+ Constant *OffsetCst =
+ ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+ Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
+ OffsetCst);
+ Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
+ if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL))
+ return Offset;
+ return -1;
+}
+
+template <class T, class HelperClass>
+static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy,
+ HelperClass &Helper,
+ const DataLayout &DL) {
+ LLVMContext &Ctx = SrcVal->getType()->getContext();
+
+ uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
+ uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8;
+ // Compute which bits of the stored value are being used by the load. Convert
+ // to an integer type to start with.
+ if (SrcVal->getType()->getScalarType()->isPointerTy())
+ SrcVal = Helper.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType()));
+ if (!SrcVal->getType()->isIntegerTy())
+ SrcVal = Helper.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8));
+
+ // Shift the bits to the least significant depending on endianness.
+ unsigned ShiftAmt;
+ if (DL.isLittleEndian())
+ ShiftAmt = Offset * 8;
+ else
+ ShiftAmt = (StoreSize - LoadSize - Offset) * 8;
+ if (ShiftAmt)
+ SrcVal = Helper.CreateLShr(SrcVal,
+ ConstantInt::get(SrcVal->getType(), ShiftAmt));
+
+ if (LoadSize != StoreSize)
+ SrcVal = Helper.CreateTruncOrBitCast(SrcVal,
+ IntegerType::get(Ctx, LoadSize * 8));
+ return SrcVal;
+}
+
+/// This function is called when we have a memdep query of a load that ends up
+/// being a clobbering store. This means that the store provides bits used by
+/// the load but the pointers don't must-alias. Check this case to see if
+/// there is anything more we can do before we give up.
+Value *getStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy,
+ Instruction *InsertPt, const DataLayout &DL) {
+
+ IRBuilder<> Builder(InsertPt);
+ SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL);
+ return coerceAvailableValueToLoadTypeHelper(SrcVal, LoadTy, Builder, DL);
+}
+
+Constant *getConstantStoreValueForLoad(Constant *SrcVal, unsigned Offset,
+ Type *LoadTy, const DataLayout &DL) {
+ ConstantFolder F;
+ SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, F, DL);
+ return coerceAvailableValueToLoadTypeHelper(SrcVal, LoadTy, F, DL);
+}
+
+/// This function is called when we have a memdep query of a load that ends up
+/// being a clobbering load. This means that the load *may* provide bits used
+/// by the load but we can't be sure because the pointers don't must-alias.
+/// Check this case to see if there is anything more we can do before we give
+/// up.
+Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy,
+ Instruction *InsertPt, const DataLayout &DL) {
+ // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
+ // widen SrcVal out to a larger load.
+ unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType());
+ unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
+ if (Offset + LoadSize > SrcValStoreSize) {
+ assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
+ assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
+ // If we have a load/load clobber an DepLI can be widened to cover this
+ // load, then we should widen it to the next power of 2 size big enough!
+ unsigned NewLoadSize = Offset + LoadSize;
+ if (!isPowerOf2_32(NewLoadSize))
+ NewLoadSize = NextPowerOf2(NewLoadSize);
+
+ Value *PtrVal = SrcVal->getPointerOperand();
+ // Insert the new load after the old load. This ensures that subsequent
+ // memdep queries will find the new load. We can't easily remove the old
+ // load completely because it is already in the value numbering table.
+ IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
+ Type *DestPTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8);
+ DestPTy =
+ PointerType::get(DestPTy, PtrVal->getType()->getPointerAddressSpace());
+ Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
+ PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
+ LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
+ NewLoad->takeName(SrcVal);
+ NewLoad->setAlignment(SrcVal->getAlignment());
+
+ DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
+ DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
+
+ // Replace uses of the original load with the wider load. On a big endian
+ // system, we need to shift down to get the relevant bits.
+ Value *RV = NewLoad;
+ if (DL.isBigEndian())
+ RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValStoreSize) * 8);
+ RV = Builder.CreateTrunc(RV, SrcVal->getType());
+ SrcVal->replaceAllUsesWith(RV);
+
+ SrcVal = NewLoad;
+ }
+
+ return getStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL);
+}
+
+Constant *getConstantLoadValueForLoad(Constant *SrcVal, unsigned Offset,
+ Type *LoadTy, const DataLayout &DL) {
+ unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType());
+ unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
+ if (Offset + LoadSize > SrcValStoreSize)
+ return nullptr;
+ return getConstantStoreValueForLoad(SrcVal, Offset, LoadTy, DL);
+}
+
+template <class T, class HelperClass>
+T *getMemInstValueForLoadHelper(MemIntrinsic *SrcInst, unsigned Offset,
+ Type *LoadTy, HelperClass &Helper,
+ const DataLayout &DL) {
+ LLVMContext &Ctx = LoadTy->getContext();
+ uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy) / 8;
+
+ // We know that this method is only called when the mem transfer fully
+ // provides the bits for the load.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
+ // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
+ // independently of what the offset is.
+ T *Val = cast<T>(MSI->getValue());
+ if (LoadSize != 1)
+ Val =
+ Helper.CreateZExtOrBitCast(Val, IntegerType::get(Ctx, LoadSize * 8));
+ T *OneElt = Val;
+
+ // Splat the value out to the right number of bits.
+ for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize;) {
+ // If we can double the number of bytes set, do it.
+ if (NumBytesSet * 2 <= LoadSize) {
+ T *ShVal = Helper.CreateShl(
+ Val, ConstantInt::get(Val->getType(), NumBytesSet * 8));
+ Val = Helper.CreateOr(Val, ShVal);
+ NumBytesSet <<= 1;
+ continue;
+ }
+
+ // Otherwise insert one byte at a time.
+ T *ShVal = Helper.CreateShl(Val, ConstantInt::get(Val->getType(), 1 * 8));
+ Val = Helper.CreateOr(OneElt, ShVal);
+ ++NumBytesSet;
+ }
+
+ return coerceAvailableValueToLoadTypeHelper(Val, LoadTy, Helper, DL);
+ }
+
+ // Otherwise, this is a memcpy/memmove from a constant global.
+ MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
+ Constant *Src = cast<Constant>(MTI->getSource());
+ unsigned AS = Src->getType()->getPointerAddressSpace();
+
+ // Otherwise, see if we can constant fold a load from the constant with the
+ // offset applied as appropriate.
+ Src =
+ ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS));
+ Constant *OffsetCst =
+ ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+ Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
+ OffsetCst);
+ Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
+ return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL);
+}
+
+/// This function is called when we have a
+/// memdep query of a load that ends up being a clobbering mem intrinsic.
+Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
+ Type *LoadTy, Instruction *InsertPt,
+ const DataLayout &DL) {
+ IRBuilder<> Builder(InsertPt);
+ return getMemInstValueForLoadHelper<Value, IRBuilder<>>(SrcInst, Offset,
+ LoadTy, Builder, DL);
+}
+
+Constant *getConstantMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
+ Type *LoadTy, const DataLayout &DL) {
+ // The only case analyzeLoadFromClobberingMemInst cannot be converted to a
+ // constant is when it's a memset of a non-constant.
+ if (auto *MSI = dyn_cast<MemSetInst>(SrcInst))
+ if (!isa<Constant>(MSI->getValue()))
+ return nullptr;
+ ConstantFolder F;
+ return getMemInstValueForLoadHelper<Constant, ConstantFolder>(SrcInst, Offset,
+ LoadTy, F, DL);
+}
+} // namespace VNCoercion
+} // namespace llvm
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 0e9baaf8649d..f77c10b6dd47 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -681,6 +681,7 @@ void MDNodeMapper::mapNodesInPOT(UniquedGraph &G) {
remapOperands(*ClonedN, [this, &D, &G](Metadata *Old) {
if (Optional<Metadata *> MappedOp = getMappedOp(Old))
return *MappedOp;
+ (void)D;
assert(G.Info[Old].ID > D.ID && "Expected a forward reference");
return &G.getFwdReference(*cast<MDNode>(Old));
});