summaryrefslogtreecommitdiff
path: root/lib/Transforms
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-04-20 21:19:10 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-04-20 21:19:10 +0000
commitd99dafe2e4a385dd2a6c76da6d8258deb100657b (patch)
treeba60bf957558bd114f25dbff3d4996b5d7a61c82 /lib/Transforms
parent71d5a2540a98c81f5bcaeb48805e0e2881f530ef (diff)
Notes
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp15
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp40
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp15
-rw-r--r--lib/Transforms/IPO/SampleProfile.cpp40
-rw-r--r--lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp12
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp8
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp23
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp20
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp38
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp77
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp4
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp239
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp4
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp2
-rw-r--r--lib/Transforms/Instrumentation/SanitizerCoverage.cpp99
-rw-r--r--lib/Transforms/Scalar/GVNHoist.cpp3
-rw-r--r--lib/Transforms/Scalar/LoopLoadElimination.cpp3
-rw-r--r--lib/Transforms/Scalar/LoopRerollPass.cpp6
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp39
-rw-r--r--lib/Transforms/Scalar/StructurizeCFG.cpp14
-rw-r--r--lib/Transforms/Utils/CmpInstAnalysis.cpp8
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp24
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp31
-rw-r--r--lib/Transforms/Utils/Local.cpp8
-rw-r--r--lib/Transforms/Utils/LoopUnrollPeel.cpp106
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp2
-rw-r--r--lib/Transforms/Utils/VNCoercion.cpp5
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp24
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp12
32 files changed, 470 insertions, 457 deletions
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 375b74c494d9..8e26849ea9e3 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -167,15 +167,12 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
// Drop any attributes that were on the vararg arguments.
AttributeList PAL = CS.getAttributes();
- if (!PAL.isEmpty() && PAL.getSlotIndex(PAL.getNumSlots() - 1) > NumArgs) {
- SmallVector<AttributeList, 8> AttributesVec;
- for (unsigned i = 0; PAL.getSlotIndex(i) <= NumArgs; ++i)
- AttributesVec.push_back(PAL.getSlotAttributes(i));
- if (PAL.hasAttributes(AttributeList::FunctionIndex))
- AttributesVec.push_back(AttributeList::get(Fn.getContext(),
- AttributeList::FunctionIndex,
- PAL.getFnAttributes()));
- PAL = AttributeList::get(Fn.getContext(), AttributesVec);
+ if (!PAL.isEmpty()) {
+ SmallVector<AttributeSet, 8> ArgAttrs;
+ for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo)
+ ArgAttrs.push_back(PAL.getParamAttributes(ArgNo));
+ PAL = AttributeList::get(Fn.getContext(), PAL.getFnAttributes(),
+ PAL.getRetAttributes(), ArgAttrs);
}
SmallVector<OperandBundleDef, 1> OpBundles;
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 4d13b3f40688..9648883b7f27 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -222,15 +222,11 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
MadeChange = true;
// Clear out any existing attributes.
- AttrBuilder B;
- B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
- F->removeAttributes(
- AttributeList::FunctionIndex,
- AttributeList::get(F->getContext(), AttributeList::FunctionIndex, B));
+ F->removeFnAttr(Attribute::ReadOnly);
+ F->removeFnAttr(Attribute::ReadNone);
// Add in the new attribute.
- F->addAttribute(AttributeList::FunctionIndex,
- ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone);
+ F->addFnAttr(ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone);
if (ReadsMemory)
++NumReadOnly;
@@ -495,9 +491,6 @@ determinePointerReadAttrs(Argument *A,
static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) {
bool Changed = false;
- AttrBuilder B;
- B.addAttribute(Attribute::Returned);
-
// Check each function in turn, determining if an argument is always returned.
for (Function *F : SCCNodes) {
// We can infer and propagate function attributes only when we know that the
@@ -535,7 +528,7 @@ static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) {
if (Value *RetArg = FindRetArg()) {
auto *A = cast<Argument>(RetArg);
- A->addAttr(AttributeList::get(F->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(Attribute::Returned);
++NumReturned;
Changed = true;
}
@@ -593,9 +586,6 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
ArgumentGraph AG;
- AttrBuilder B;
- B.addAttribute(Attribute::NoCapture);
-
// Check each function in turn, determining which pointer arguments are not
// captured.
for (Function *F : SCCNodes) {
@@ -614,7 +604,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E;
++A) {
if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
- A->addAttr(AttributeList::get(F->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(Attribute::NoCapture);
++NumNoCapture;
Changed = true;
}
@@ -633,8 +623,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (!Tracker.Captured) {
if (Tracker.Uses.empty()) {
// If it's trivially not captured, mark it nocapture now.
- A->addAttr(
- AttributeList::get(F->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(Attribute::NoCapture);
++NumNoCapture;
Changed = true;
} else {
@@ -660,9 +649,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
Self.insert(&*A);
Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self);
if (R != Attribute::None) {
- AttrBuilder B;
- B.addAttribute(R);
- A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(R);
Changed = true;
R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
}
@@ -687,7 +674,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (ArgumentSCC[0]->Uses.size() == 1 &&
ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
Argument *A = ArgumentSCC[0]->Definition;
- A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(Attribute::NoCapture);
++NumNoCapture;
Changed = true;
}
@@ -729,7 +716,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
- A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(Attribute::NoCapture);
++NumNoCapture;
Changed = true;
}
@@ -760,15 +747,12 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
}
if (ReadAttr != Attribute::None) {
- AttrBuilder B, R;
- B.addAttribute(ReadAttr);
- R.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
// Clear out existing readonly/readnone attributes
- A->removeAttr(
- AttributeList::get(A->getContext(), A->getArgNo() + 1, R));
- A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B));
+ A->removeAttr(Attribute::ReadOnly);
+ A->removeAttr(Attribute::ReadNone);
+ A->addAttr(ReadAttr);
ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
Changed = true;
}
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index ade4f21ceb52..ae9d4ce11e0d 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1979,16 +1979,11 @@ static void ChangeCalleesToFastCall(Function *F) {
}
}
-static AttributeList StripNest(LLVMContext &C, const AttributeList &Attrs) {
- for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
- unsigned Index = Attrs.getSlotIndex(i);
- if (!Attrs.getSlotAttributes(i).hasAttribute(Index, Attribute::Nest))
- continue;
-
- // There can be only one.
- return Attrs.removeAttribute(C, Index, Attribute::Nest);
- }
-
+static AttributeList StripNest(LLVMContext &C, AttributeList Attrs) {
+ // There can be at most one attribute set with a nest attribute.
+ unsigned NestIndex;
+ if (Attrs.hasAttrSomewhere(Attribute::Nest, &NestIndex))
+ return Attrs.removeAttribute(C, NestIndex, Attribute::Nest);
return Attrs;
}
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp
index 3371de6e3d14..e755e2bd8f26 100644
--- a/lib/Transforms/IPO/SampleProfile.cpp
+++ b/lib/Transforms/IPO/SampleProfile.cpp
@@ -43,6 +43,7 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/SampleProfReader.h"
@@ -208,6 +209,12 @@ protected:
/// the same number of times.
EquivalenceClassMap EquivalenceClass;
+ /// Map from function name to Function *. Used to find the function from
+ /// the function name. If the function name contains suffix, additional
+ /// entry is added to map from the stripped name to the function if there
+ /// is one-to-one mapping.
+ StringMap<Function *> SymbolMap;
+
/// \brief Dominance, post-dominance and loop information.
std::unique_ptr<DominatorTree> DT;
std::unique_ptr<DominatorTreeBase<BasicBlock>> PDT;
@@ -670,7 +677,7 @@ bool SampleProfileLoader::inlineHotFunctions(
for (auto &I : BB.getInstList()) {
const FunctionSamples *FS = nullptr;
if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
- (FS = findCalleeFunctionSamples(I))) {
+ !isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) {
Candidates.push_back(&I);
if (callsiteIsHot(Samples, FS))
Hot = true;
@@ -689,7 +696,10 @@ bool SampleProfileLoader::inlineHotFunctions(
for (const auto *FS : findIndirectCallFunctionSamples(*I)) {
auto CalleeFunctionName = FS->getName();
const char *Reason = "Callee function not available";
- CalledFunction = F.getParent()->getFunction(CalleeFunctionName);
+ auto R = SymbolMap.find(CalleeFunctionName);
+ if (R == SymbolMap.end())
+ continue;
+ CalledFunction = R->getValue();
if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) {
// The indirect target was promoted and inlined in the profile, as a
// result, we do not have profile info for the branch probability.
@@ -1181,8 +1191,11 @@ void SampleProfileLoader::propagateWeights(Function &F) {
if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
continue;
+ DebugLoc BranchLoc = TI->getDebugLoc();
DEBUG(dbgs() << "\nGetting weights for branch at line "
- << TI->getDebugLoc().getLine() << ".\n");
+ << ((BranchLoc) ? Twine(BranchLoc.getLine())
+ : Twine("<UNKNOWN LOCATION>"))
+ << ".\n");
SmallVector<uint32_t, 4> Weights;
uint32_t MaxWeight = 0;
DebugLoc MaxDestLoc;
@@ -1219,7 +1232,6 @@ void SampleProfileLoader::propagateWeights(Function &F) {
DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
TI->setMetadata(llvm::LLVMContext::MD_prof,
MDB.createBranchWeights(Weights));
- DebugLoc BranchLoc = TI->getDebugLoc();
emitOptimizationRemark(
Ctx, DEBUG_TYPE, F, MaxDestLoc,
Twine("most popular destination for conditional branches at ") +
@@ -1414,6 +1426,26 @@ bool SampleProfileLoader::runOnModule(Module &M) {
for (const auto &I : Reader->getProfiles())
TotalCollectedSamples += I.second.getTotalSamples();
+ // Populate the symbol map.
+ for (const auto &N_F : M.getValueSymbolTable()) {
+ std::string OrigName = N_F.getKey();
+ Function *F = dyn_cast<Function>(N_F.getValue());
+ if (F == nullptr)
+ continue;
+ SymbolMap[OrigName] = F;
+ auto pos = OrigName.find('.');
+ if (pos != std::string::npos) {
+ std::string NewName = OrigName.substr(0, pos);
+ auto r = SymbolMap.insert(std::make_pair(NewName, F));
+ // Failiing to insert means there is already an entry in SymbolMap,
+ // thus there are multiple functions that are mapped to the same
+ // stripped name. In this case of name conflicting, set the value
+ // to nullptr to avoid confusion.
+ if (!r.second)
+ r.first->second = nullptr;
+ }
+ }
+
bool retval = false;
for (auto &F : M)
if (!F.isDeclaration()) {
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 65deb82cd2a5..9801a0a61416 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -363,6 +363,7 @@ void splitAndWriteThinLTOBitcode(
W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
/*GenerateHash=*/true, &ModHash);
W.writeModule(MergedM.get());
+ W.writeStrtab();
OS << Buffer;
// If a minimized bitcode module was requested for the thin link,
@@ -375,6 +376,7 @@ void splitAndWriteThinLTOBitcode(
W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
/*GenerateHash=*/false, &ModHash);
W2.writeModule(MergedM.get());
+ W2.writeStrtab();
*ThinLinkOS << Buffer;
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 174ec8036274..e30a4bafb9b0 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1044,14 +1044,14 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
const APInt *RHSC;
if (match(RHS, m_APInt(RHSC))) {
- if (RHSC->isSignBit()) {
+ if (RHSC->isSignMask()) {
// If wrapping is not allowed, then the addition must set the sign bit:
- // X + (signbit) --> X | signbit
+ // X + (signmask) --> X | signmask
if (I.hasNoSignedWrap() || I.hasNoUnsignedWrap())
return BinaryOperator::CreateOr(LHS, RHS);
// If wrapping is allowed, then the addition flips the sign bit of LHS:
- // X + (signbit) --> X ^ signbit
+ // X + (signmask) --> X ^ signmask
return BinaryOperator::CreateXor(LHS, RHS);
}
@@ -1120,9 +1120,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
XorLHS);
}
- // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C,
- // transform them into (X + (signbit ^ C))
- if (XorRHS->getValue().isSignBit())
+ // (X + signmask) + C could have gotten canonicalized to (X^signmask) + C,
+ // transform them into (X + (signmask ^ C))
+ if (XorRHS->getValue().isSignMask())
return BinaryOperator::CreateAdd(XorLHS,
ConstantExpr::getXor(XorRHS, CI));
}
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index b2a41c699202..3a98e8937bda 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2078,7 +2078,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
Value *NOr = Builder->CreateOr(A, Op1);
NOr->takeName(Op0);
return BinaryOperator::CreateXor(NOr,
- cast<Instruction>(Op0)->getOperand(1));
+ ConstantInt::get(NOr->getType(), *C));
}
// Y|(X^C) -> (X|Y)^C iff Y&C == 0
@@ -2087,7 +2087,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
Value *NOr = Builder->CreateOr(A, Op0);
NOr->takeName(Op0);
return BinaryOperator::CreateXor(NOr,
- cast<Instruction>(Op1)->getOperand(1));
+ ConstantInt::get(NOr->getType(), *C));
}
}
@@ -2480,8 +2480,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
return BinaryOperator::CreateSub(SubOne(NegOp0CI),
Op0I->getOperand(0));
- } else if (RHSC->getValue().isSignBit()) {
- // (X + C) ^ signbit -> (X + C + signbit)
+ } else if (RHSC->getValue().isSignMask()) {
+ // (X + C) ^ signmask -> (X + C + signmask)
Constant *C = Builder->getInt(RHSC->getValue() + Op0CI->getValue());
return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 69484f47223f..e7aa1a457371 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -839,7 +839,8 @@ static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0,
// Length bits.
if (CI0) {
APInt Elt = CI0->getValue();
- Elt = Elt.lshr(Index).zextOrTrunc(Length);
+ Elt.lshrInPlace(Index);
+ Elt = Elt.zextOrTrunc(Length);
return LowConstantHighUndef(Elt.getZExtValue());
}
@@ -1036,7 +1037,7 @@ static Value *simplifyX86vpermilvar(const IntrinsicInst &II,
// The PD variants uses bit 1 to select per-lane element index, so
// shift down to convert to generic shuffle mask index.
if (IsPD)
- Index = Index.lshr(1);
+ Index.lshrInPlace(1);
// The _256 variants are a bit trickier since the mask bits always index
// into the corresponding 128 half. In order to convert to a generic
@@ -4067,21 +4068,15 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
}
if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
- !CallerPAL.isEmpty())
+ !CallerPAL.isEmpty()) {
// In this case we have more arguments than the new function type, but we
// won't be dropping them. Check that these extra arguments have attributes
// that are compatible with being a vararg call argument.
- for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
- unsigned Index = CallerPAL.getSlotIndex(i - 1);
- if (Index <= FT->getNumParams())
- break;
-
- // Check if it has an attribute that's incompatible with varargs.
- AttributeList PAttrs = CallerPAL.getSlotAttributes(i - 1);
- if (PAttrs.hasAttribute(Index, Attribute::StructRet))
- return false;
- }
-
+ unsigned SRetIdx;
+ if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
+ SRetIdx > FT->getNumParams())
+ return false;
+ }
// Okay, we decided that this is a safe thing to do: go ahead and start
// inserting cast instructions as necessary.
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 25683132c786..9127ddca5915 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1591,7 +1591,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
// GEP into CI would undo canonicalizing addrspacecast with different
// pointer types, causing infinite loops.
(!isa<AddrSpaceCastInst>(CI) ||
- GEP->getType() == GEP->getPointerOperand()->getType())) {
+ GEP->getType() == GEP->getPointerOperandType())) {
// Changing the cast operand is usually not a good idea but it is safe
// here because the pointer operand is being replaced with another
// pointer operand so the opcode doesn't need to change.
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index bbafa9e9f468..003029ae39d5 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -140,7 +140,7 @@ static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS,
case ICmpInst::ICMP_UGE:
// True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
TrueIfSigned = true;
- return RHS.isSignBit();
+ return RHS.isSignMask();
default:
return false;
}
@@ -1532,14 +1532,14 @@ Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp,
}
if (Xor->hasOneUse()) {
- // (icmp u/s (xor X SignBit), C) -> (icmp s/u X, (xor C SignBit))
- if (!Cmp.isEquality() && XorC->isSignBit()) {
+ // (icmp u/s (xor X SignMask), C) -> (icmp s/u X, (xor C SignMask))
+ if (!Cmp.isEquality() && XorC->isSignMask()) {
Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate()
: Cmp.getSignedPredicate();
return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), *C ^ *XorC));
}
- // (icmp u/s (xor X ~SignBit), C) -> (icmp s/u X, (xor C ~SignBit))
+ // (icmp u/s (xor X ~SignMask), C) -> (icmp s/u X, (xor C ~SignMask))
if (!Cmp.isEquality() && XorC->isMaxSignedValue()) {
Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate()
: Cmp.getSignedPredicate();
@@ -2402,9 +2402,9 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
const APInt &Upper = CR.getUpper();
const APInt &Lower = CR.getLower();
if (Cmp.isSigned()) {
- if (Lower.isSignBit())
+ if (Lower.isSignMask())
return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, Upper));
- if (Upper.isSignBit())
+ if (Upper.isSignMask())
return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, Lower));
} else {
if (Lower.isMinValue())
@@ -2604,7 +2604,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
break;
// Replace (and X, (1 << size(X)-1) != 0) with x s< 0
- if (BOC->isSignBit()) {
+ if (BOC->isSignMask()) {
Constant *Zero = Constant::getNullValue(BOp0->getType());
auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
return new ICmpInst(NewPred, BOp0, Zero);
@@ -3032,9 +3032,9 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
BO1->getOperand(0));
- // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
+ // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
- if (CI->getValue().isSignBit()) {
+ if (CI->getValue().isSignMask()) {
ICmpInst::Predicate Pred =
I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate();
return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
@@ -3797,7 +3797,7 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth,
bool isSignCheck) {
if (isSignCheck)
- return APInt::getSignBit(BitWidth);
+ return APInt::getSignMask(BitWidth);
ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1));
if (!CI) return APInt::getAllOnesValue(BitWidth);
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 6288e054f1bc..675553017838 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -931,6 +931,18 @@ static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr,
return nullptr;
}
+static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) {
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
+ const Value *GEPI0 = GEPI->getOperand(0);
+ if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0)
+ return true;
+ }
+ if (isa<UndefValue>(Op) ||
+ (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0))
+ return true;
+ return false;
+}
+
Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
Value *Op = LI.getOperand(0);
@@ -979,27 +991,13 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (!LI.isUnordered()) return nullptr;
// load(gep null, ...) -> unreachable
- if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
- const Value *GEPI0 = GEPI->getOperand(0);
- // TODO: Consider a target hook for valid address spaces for this xform.
- if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
- // Insert a new store to null instruction before the load to indicate
- // that this code is not reachable. We do this instead of inserting
- // an unreachable instruction directly because we cannot modify the
- // CFG.
- new StoreInst(UndefValue::get(LI.getType()),
- Constant::getNullValue(Op->getType()), &LI);
- return replaceInstUsesWith(LI, UndefValue::get(LI.getType()));
- }
- }
-
// load null/undef -> unreachable
- // TODO: Consider a target hook for valid address spaces for this xform.
- if (isa<UndefValue>(Op) ||
- (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
- // Insert a new store to null instruction before the load to indicate that
- // this code is not reachable. We do this instead of inserting an
- // unreachable instruction directly because we cannot modify the CFG.
+ // TODO: Consider a target hook for valid address spaces for this xforms.
+ if (canSimplifyNullLoadOrGEP(LI, Op)) {
+ // Insert a new store to null instruction before the load to indicate
+ // that this code is not reachable. We do this instead of inserting
+ // an unreachable instruction directly because we cannot modify the
+ // CFG.
new StoreInst(UndefValue::get(LI.getType()),
Constant::getNullValue(Op->getType()), &LI);
return replaceInstUsesWith(LI, UndefValue::get(LI.getType()));
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index f1ac82057e6c..ce66581a491a 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -944,22 +944,21 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
}
}
- if (ConstantInt *One = dyn_cast<ConstantInt>(Op0)) {
- if (One->isOne() && !I.getType()->isIntegerTy(1)) {
- bool isSigned = I.getOpcode() == Instruction::SDiv;
- if (isSigned) {
- // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the
- // result is one, if Op1 is -1 then the result is minus one, otherwise
- // it's zero.
- Value *Inc = Builder->CreateAdd(Op1, One);
- Value *Cmp = Builder->CreateICmpULT(
- Inc, ConstantInt::get(I.getType(), 3));
- return SelectInst::Create(Cmp, Op1, ConstantInt::get(I.getType(), 0));
- } else {
- // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the
- // result is one, otherwise it's zero.
- return new ZExtInst(Builder->CreateICmpEQ(Op1, One), I.getType());
- }
+ if (match(Op0, m_One())) {
+ assert(!I.getType()->getScalarType()->isIntegerTy(1) &&
+ "i1 divide not removed?");
+ if (I.getOpcode() == Instruction::SDiv) {
+ // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the
+ // result is one, if Op1 is -1 then the result is minus one, otherwise
+ // it's zero.
+ Value *Inc = Builder->CreateAdd(Op1, Op0);
+ Value *Cmp = Builder->CreateICmpULT(
+ Inc, ConstantInt::get(I.getType(), 3));
+ return SelectInst::Create(Cmp, Op1, ConstantInt::get(I.getType(), 0));
+ } else {
+ // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the
+ // result is one, otherwise it's zero.
+ return new ZExtInst(Builder->CreateICmpEQ(Op1, Op0), I.getType());
}
}
@@ -1238,25 +1237,23 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
// If the sign bits of both operands are zero (i.e. we can prove they are
// unsigned inputs), turn this into a udiv.
- if (I.getType()->isIntegerTy()) {
- APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
- if (MaskedValueIsZero(Op0, Mask, 0, &I)) {
- if (MaskedValueIsZero(Op1, Mask, 0, &I)) {
- // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
- auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
- BO->setIsExact(I.isExact());
- return BO;
- }
+ APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits()));
+ if (MaskedValueIsZero(Op0, Mask, 0, &I)) {
+ if (MaskedValueIsZero(Op1, Mask, 0, &I)) {
+ // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
+ auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+ BO->setIsExact(I.isExact());
+ return BO;
+ }
- if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) {
- // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
- // Safe because the only negative value (1 << Y) can take on is
- // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
- // the sign bit set.
- auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
- BO->setIsExact(I.isExact());
- return BO;
- }
+ if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) {
+ // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
+ // Safe because the only negative value (1 << Y) can take on is
+ // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
+ // the sign bit set.
+ auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+ BO->setIsExact(I.isExact());
+ return BO;
}
}
@@ -1546,13 +1543,11 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
// If the sign bits of both operands are zero (i.e. we can prove they are
// unsigned inputs), turn this into a urem.
- if (I.getType()->isIntegerTy()) {
- APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
- if (MaskedValueIsZero(Op1, Mask, 0, &I) &&
- MaskedValueIsZero(Op0, Mask, 0, &I)) {
- // X srem Y -> X urem Y, iff X and Y don't have sign bit set
- return BinaryOperator::CreateURem(Op0, Op1, I.getName());
- }
+ APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits()));
+ if (MaskedValueIsZero(Op1, Mask, 0, &I) &&
+ MaskedValueIsZero(Op0, Mask, 0, &I)) {
+ // X srem Y -> X urem Y, iff X and Y don't have sign bit set
+ return BinaryOperator::CreateURem(Op0, Op1, I.getName());
}
// If it's a constant vector, flip any negative values positive.
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 693b6c95c169..5d6d899da4b5 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -618,7 +618,7 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
{
unsigned BitWidth =
DL.getTypeSizeInBits(TrueVal->getType()->getScalarType());
- APInt MinSignedValue = APInt::getSignBit(BitWidth);
+ APInt MinSignedValue = APInt::getSignedMinValue(BitWidth);
Value *X;
const APInt *Y, *C;
bool TrueWhenUnset;
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 9aa679c60e47..f77d713b9b07 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -370,7 +370,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
MaskV <<= Op1C->getZExtValue();
else {
assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
- MaskV = MaskV.lshr(Op1C->getZExtValue());
+ MaskV.lshrInPlace(Op1C->getZExtValue());
}
// shift1 & 0x00FF
@@ -760,7 +760,7 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
}
// See if we can turn a signed shr into an unsigned shr.
- if (MaskedValueIsZero(Op0, APInt::getSignBit(BitWidth), 0, &I))
+ if (MaskedValueIsZero(Op0, APInt::getSignMask(BitWidth), 0, &I))
return BinaryOperator::CreateLShr(Op0, Op1);
return nullptr;
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 4e6f02058d83..2ba052b7e02d 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -38,7 +38,7 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
// If there are no bits set that aren't demanded, nothing to do.
Demanded = Demanded.zextOrTrunc(C->getBitWidth());
- if ((~Demanded & *C) == 0)
+ if (C->isSubsetOf(Demanded))
return false;
// This instruction is producing bits that are not demanded. Shrink the RHS.
@@ -117,27 +117,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
KnownOne.getBitWidth() == BitWidth &&
"Value *V, DemandedMask, KnownZero and KnownOne "
"must have same BitWidth");
- const APInt *C;
- if (match(V, m_APInt(C))) {
- // We know all of the bits for a scalar constant or a splat vector constant!
- KnownOne = *C & DemandedMask;
- KnownZero = ~KnownOne & DemandedMask;
- return nullptr;
- }
- if (isa<ConstantPointerNull>(V)) {
- // We know all of the bits for a constant!
- KnownOne.clearAllBits();
- KnownZero = DemandedMask;
+
+ if (isa<Constant>(V)) {
+ computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
return nullptr;
}
KnownZero.clearAllBits();
KnownOne.clearAllBits();
- if (DemandedMask == 0) { // Not demanding any bits from V.
- if (isa<UndefValue>(V))
- return nullptr;
+ if (DemandedMask == 0) // Not demanding any bits from V.
return UndefValue::get(VTy);
- }
if (Depth == 6) // Limit search depth.
return nullptr;
@@ -187,16 +176,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If the client is only demanding bits that we know, return the known
// constant.
- if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+ if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
return Constant::getIntegerValue(VTy, IKnownOne);
// If all of the demanded bits are known 1 on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
- if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
- (DemandedMask & ~LHSKnownZero))
+ if (DemandedMask.isSubsetOf(LHSKnownZero | RHSKnownOne))
return I->getOperand(0);
- if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
- (DemandedMask & ~RHSKnownZero))
+ if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownOne))
return I->getOperand(1);
// If the RHS is a constant, see if we can simplify it.
@@ -224,25 +211,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If the client is only demanding bits that we know, return the known
// constant.
- if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+ if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
return Constant::getIntegerValue(VTy, IKnownOne);
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
- if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
- (DemandedMask & ~LHSKnownOne))
+ if (DemandedMask.isSubsetOf(LHSKnownOne | RHSKnownZero))
return I->getOperand(0);
- if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
- (DemandedMask & ~RHSKnownOne))
- return I->getOperand(1);
-
- // If all of the potentially set bits on one side are known to be set on
- // the other side, just use the 'other' side.
- if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
- (DemandedMask & (~RHSKnownZero)))
- return I->getOperand(0);
- if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
- (DemandedMask & (~LHSKnownZero)))
+ if (DemandedMask.isSubsetOf(RHSKnownOne | LHSKnownZero))
return I->getOperand(1);
// If the RHS is a constant, see if we can simplify it.
@@ -271,20 +247,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If the client is only demanding bits that we know, return the known
// constant.
- if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+ if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
return Constant::getIntegerValue(VTy, IKnownOne);
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
- if ((DemandedMask & RHSKnownZero) == DemandedMask)
+ if (DemandedMask.isSubsetOf(RHSKnownZero))
return I->getOperand(0);
- if ((DemandedMask & LHSKnownZero) == DemandedMask)
+ if (DemandedMask.isSubsetOf(LHSKnownZero))
return I->getOperand(1);
// If all of the demanded bits are known to be zero on one side or the
// other, turn this into an *inclusive* or.
// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
- if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
+ if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownZero)) {
Instruction *Or =
BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
I->getName());
@@ -295,14 +271,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// bits on that side are also known to be set on the other side, turn this
// into an AND, as we know the bits will be cleared.
// e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
- if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
- // all known
- if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
- Constant *AndC = Constant::getIntegerValue(VTy,
- ~RHSKnownOne & DemandedMask);
- Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
- return InsertNewInstWith(And, *I);
- }
+ if (DemandedMask.isSubsetOf(RHSKnownZero|RHSKnownOne) &&
+ RHSKnownOne.isSubsetOf(LHSKnownOne)) {
+ Constant *AndC = Constant::getIntegerValue(VTy,
+ ~RHSKnownOne & DemandedMask);
+ Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
+ return InsertNewInstWith(And, *I);
}
// If the RHS is a constant, see if we can simplify it.
@@ -529,9 +503,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
KnownZero.setLowBits(ShiftAmt);
}
break;
- case Instruction::LShr:
- // For a logical shift right
- if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ case Instruction::LShr: {
+ const APInt *SA;
+ if (match(I->getOperand(1), m_APInt(SA))) {
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
// Unsigned shift right.
@@ -546,13 +520,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
- KnownZero = KnownZero.lshr(ShiftAmt);
- KnownOne = KnownOne.lshr(ShiftAmt);
+ KnownZero.lshrInPlace(ShiftAmt);
+ KnownOne.lshrInPlace(ShiftAmt);
if (ShiftAmt)
KnownZero.setHighBits(ShiftAmt); // high bits known zero.
}
break;
- case Instruction::AShr:
+ }
+ case Instruction::AShr: {
// If this is an arithmetic shift right and only the low-bit is set, we can
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
@@ -566,15 +541,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If the sign bit is the only bit demanded by this ashr, then there is no
// need to do it, the shift doesn't change the high bit.
- if (DemandedMask.isSignBit())
+ if (DemandedMask.isSignMask())
return I->getOperand(0);
- if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ const APInt *SA;
+ if (match(I->getOperand(1), m_APInt(SA))) {
uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
// Signed shift right.
APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
- // If any of the "high bits" are demanded, we should set the sign bit as
+ // If any of the high bits are demanded, we should set the sign bit as
// demanded.
if (DemandedMask.countLeadingZeros() <= ShiftAmt)
DemandedMaskIn.setSignBit();
@@ -587,31 +563,32 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne,
Depth + 1))
return I;
+
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
// Compute the new bits that are at the top now.
APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
- KnownZero = KnownZero.lshr(ShiftAmt);
- KnownOne = KnownOne.lshr(ShiftAmt);
+ KnownZero.lshrInPlace(ShiftAmt);
+ KnownOne.lshrInPlace(ShiftAmt);
// Handle the sign bits.
- APInt SignBit(APInt::getSignBit(BitWidth));
+ APInt SignMask(APInt::getSignMask(BitWidth));
// Adjust to where it is now in the mask.
- SignBit = SignBit.lshr(ShiftAmt);
+ SignMask.lshrInPlace(ShiftAmt);
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
(HighBits & ~DemandedMask) == HighBits) {
- // Perform the logical shift right.
- BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0),
- SA, I->getName());
- NewVal->setIsExact(cast<BinaryOperator>(I)->isExact());
- return InsertNewInstWith(NewVal, *I);
- } else if ((KnownOne & SignBit) != 0) { // New bits are known one.
+ BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0),
+ I->getOperand(1));
+ LShr->setIsExact(cast<BinaryOperator>(I)->isExact());
+ return InsertNewInstWith(LShr, *I);
+ } else if ((KnownOne & SignMask) != 0) { // New bits are known one.
KnownOne |= HighBits;
}
}
break;
+ }
case Instruction::SRem:
if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
// X % -1 demands all the bits because we don't want to introduce
@@ -624,7 +601,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return I->getOperand(0);
APInt LowBits = RA - 1;
- APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+ APInt Mask2 = LowBits | APInt::getSignMask(BitWidth);
if (SimplifyDemandedBits(I, 0, Mask2, LHSKnownZero, LHSKnownOne,
Depth + 1))
return I;
@@ -635,26 +612,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If LHS is non-negative or has all low bits zero, then the upper bits
// are all zero.
- if (LHSKnownZero.isNegative() || ((LHSKnownZero & LowBits) == LowBits))
+ if (LHSKnownZero.isSignBitSet() || ((LHSKnownZero & LowBits) == LowBits))
KnownZero |= ~LowBits;
// If LHS is negative and not all low bits are zero, then the upper bits
// are all one.
- if (LHSKnownOne.isNegative() && ((LHSKnownOne & LowBits) != 0))
+ if (LHSKnownOne.isSignBitSet() && ((LHSKnownOne & LowBits) != 0))
KnownOne |= ~LowBits;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ break;
}
}
// The sign bit is the LHS's sign bit, except when the result of the
// remainder is zero.
- if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
- APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ if (DemandedMask.isSignBitSet()) {
computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
CxtI);
// If it's known zero, our sign bit is also zero.
- if (LHSKnownZero.isNegative())
+ if (LHSKnownZero.isSignBitSet())
KnownZero.setSignBit();
}
break;
@@ -744,7 +721,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If the client is only demanding bits that we know, return the known
// constant.
- if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
+ if (DemandedMask.isSubsetOf(KnownZero|KnownOne))
return Constant::getIntegerValue(VTy, KnownOne);
return nullptr;
}
@@ -783,17 +760,15 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
// If the client is only demanding bits that we know, return the known
// constant.
- if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+ if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
return Constant::getIntegerValue(ITy, IKnownOne);
// If all of the demanded bits are known 1 on one side, return the other.
// These bits cannot contribute to the result of the 'and' in this
// context.
- if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
- (DemandedMask & ~LHSKnownZero))
+ if (DemandedMask.isSubsetOf(LHSKnownZero | RHSKnownOne))
return I->getOperand(0);
- if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
- (DemandedMask & ~RHSKnownZero))
+ if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownOne))
return I->getOperand(1);
KnownZero = std::move(IKnownZero);
@@ -817,26 +792,15 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
// If the client is only demanding bits that we know, return the known
// constant.
- if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+ if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
return Constant::getIntegerValue(ITy, IKnownOne);
// If all of the demanded bits are known zero on one side, return the
// other. These bits cannot contribute to the result of the 'or' in this
// context.
- if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
- (DemandedMask & ~LHSKnownOne))
+ if (DemandedMask.isSubsetOf(LHSKnownOne | RHSKnownZero))
return I->getOperand(0);
- if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
- (DemandedMask & ~RHSKnownOne))
- return I->getOperand(1);
-
- // If all of the potentially set bits on one side are known to be set on
- // the other side, just use the 'other' side.
- if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
- (DemandedMask & (~RHSKnownZero)))
- return I->getOperand(0);
- if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
- (DemandedMask & (~LHSKnownZero)))
+ if (DemandedMask.isSubsetOf(RHSKnownOne | LHSKnownZero))
return I->getOperand(1);
KnownZero = std::move(IKnownZero);
@@ -861,14 +825,14 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
// If the client is only demanding bits that we know, return the known
// constant.
- if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+ if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
return Constant::getIntegerValue(ITy, IKnownOne);
// If all of the demanded bits are known zero on one side, return the
// other.
- if ((DemandedMask & RHSKnownZero) == DemandedMask)
+ if (DemandedMask.isSubsetOf(RHSKnownZero))
return I->getOperand(0);
- if ((DemandedMask & LHSKnownZero) == DemandedMask)
+ if (DemandedMask.isSubsetOf(LHSKnownZero))
return I->getOperand(1);
// Output known-0 bits are known if clear or set in both the LHS & RHS.
@@ -883,7 +847,7 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
// If this user is only demanding bits that we know, return the known
// constant.
- if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
+ if (DemandedMask.isSubsetOf(KnownZero|KnownOne))
return Constant::getIntegerValue(ITy, KnownOne);
break;
@@ -1641,7 +1605,52 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
UndefElts.setHighBits(VWidth / 2);
break;
case Intrinsic::amdgcn_buffer_load:
- case Intrinsic::amdgcn_buffer_load_format: {
+ case Intrinsic::amdgcn_buffer_load_format:
+ case Intrinsic::amdgcn_image_sample:
+ case Intrinsic::amdgcn_image_sample_cl:
+ case Intrinsic::amdgcn_image_sample_d:
+ case Intrinsic::amdgcn_image_sample_d_cl:
+ case Intrinsic::amdgcn_image_sample_l:
+ case Intrinsic::amdgcn_image_sample_b:
+ case Intrinsic::amdgcn_image_sample_b_cl:
+ case Intrinsic::amdgcn_image_sample_lz:
+ case Intrinsic::amdgcn_image_sample_cd:
+ case Intrinsic::amdgcn_image_sample_cd_cl:
+
+ case Intrinsic::amdgcn_image_sample_c:
+ case Intrinsic::amdgcn_image_sample_c_cl:
+ case Intrinsic::amdgcn_image_sample_c_d:
+ case Intrinsic::amdgcn_image_sample_c_d_cl:
+ case Intrinsic::amdgcn_image_sample_c_l:
+ case Intrinsic::amdgcn_image_sample_c_b:
+ case Intrinsic::amdgcn_image_sample_c_b_cl:
+ case Intrinsic::amdgcn_image_sample_c_lz:
+ case Intrinsic::amdgcn_image_sample_c_cd:
+ case Intrinsic::amdgcn_image_sample_c_cd_cl:
+
+ case Intrinsic::amdgcn_image_sample_o:
+ case Intrinsic::amdgcn_image_sample_cl_o:
+ case Intrinsic::amdgcn_image_sample_d_o:
+ case Intrinsic::amdgcn_image_sample_d_cl_o:
+ case Intrinsic::amdgcn_image_sample_l_o:
+ case Intrinsic::amdgcn_image_sample_b_o:
+ case Intrinsic::amdgcn_image_sample_b_cl_o:
+ case Intrinsic::amdgcn_image_sample_lz_o:
+ case Intrinsic::amdgcn_image_sample_cd_o:
+ case Intrinsic::amdgcn_image_sample_cd_cl_o:
+
+ case Intrinsic::amdgcn_image_sample_c_o:
+ case Intrinsic::amdgcn_image_sample_c_cl_o:
+ case Intrinsic::amdgcn_image_sample_c_d_o:
+ case Intrinsic::amdgcn_image_sample_c_d_cl_o:
+ case Intrinsic::amdgcn_image_sample_c_l_o:
+ case Intrinsic::amdgcn_image_sample_c_b_o:
+ case Intrinsic::amdgcn_image_sample_c_b_cl_o:
+ case Intrinsic::amdgcn_image_sample_c_lz_o:
+ case Intrinsic::amdgcn_image_sample_c_cd_o:
+ case Intrinsic::amdgcn_image_sample_c_cd_cl_o:
+
+ case Intrinsic::amdgcn_image_getlod: {
if (VWidth == 1 || !DemandedElts.isMask())
return nullptr;
@@ -1656,8 +1665,17 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
Type *NewTy = (NewNumElts == 1) ? EltTy :
VectorType::get(EltTy, NewNumElts);
- Function *NewIntrin = Intrinsic::getDeclaration(M, II->getIntrinsicID(),
- NewTy);
+ auto IID = II->getIntrinsicID();
+
+ bool IsBuffer = IID == Intrinsic::amdgcn_buffer_load ||
+ IID == Intrinsic::amdgcn_buffer_load_format;
+
+ Function *NewIntrin = IsBuffer ?
+ Intrinsic::getDeclaration(M, IID, NewTy) :
+ // Samplers have 3 mangled types.
+ Intrinsic::getDeclaration(M, IID,
+ { NewTy, II->getArgOperand(0)->getType(),
+ II->getArgOperand(1)->getType()});
SmallVector<Value *, 5> Args;
for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I)
@@ -1669,6 +1687,29 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
CallInst *NewCall = Builder->CreateCall(NewIntrin, Args);
NewCall->takeName(II);
NewCall->copyMetadata(*II);
+
+ if (!IsBuffer) {
+ ConstantInt *DMask = dyn_cast<ConstantInt>(NewCall->getArgOperand(3));
+ if (DMask) {
+ unsigned DMaskVal = DMask->getZExtValue() & 0xf;
+
+ unsigned PopCnt = 0;
+ unsigned NewDMask = 0;
+ for (unsigned I = 0; I < 4; ++I) {
+ const unsigned Bit = 1 << I;
+ if (!!(DMaskVal & Bit)) {
+ if (++PopCnt > NewNumElts)
+ break;
+
+ NewDMask |= Bit;
+ }
+ }
+
+ NewCall->setArgOperand(3, ConstantInt::get(DMask->getType(), NewDMask));
+ }
+ }
+
+
if (NewNumElts == 1) {
return Builder->CreateInsertElement(UndefValue::get(V->getType()),
NewCall, static_cast<uint64_t>(0));
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 88ef17bbc8fa..81f2d9fa179f 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -148,9 +148,9 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
bool Overflow = false;
if (Opcode == Instruction::Add)
- BVal->sadd_ov(*CVal, Overflow);
+ (void)BVal->sadd_ov(*CVal, Overflow);
else
- BVal->ssub_ov(*CVal, Overflow);
+ (void)BVal->ssub_ov(*CVal, Overflow);
return !Overflow;
}
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 94cfc69ed555..036dd8d39a08 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -2586,7 +2586,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
Value *NewAllocaPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)),
AI->getType());
- replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, /*Deref=*/true);
+ replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, /*Deref=*/false);
AI->replaceAllUsesWith(NewAllocaPtr);
}
diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index fa0c7cc5a4c5..8bdd917a0596 100644
--- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -59,13 +59,8 @@ using namespace llvm;
static const char *const SanCovModuleInitName = "__sanitizer_cov_module_init";
static const char *const SanCovName = "__sanitizer_cov";
static const char *const SanCovWithCheckName = "__sanitizer_cov_with_check";
-static const char *const SanCovIndirCallName = "__sanitizer_cov_indir_call16";
static const char *const SanCovTracePCIndirName =
"__sanitizer_cov_trace_pc_indir";
-static const char *const SanCovTraceEnterName =
- "__sanitizer_cov_trace_func_enter";
-static const char *const SanCovTraceBBName =
- "__sanitizer_cov_trace_basic_block";
static const char *const SanCovTracePCName = "__sanitizer_cov_trace_pc";
static const char *const SanCovTraceCmp1 = "__sanitizer_cov_trace_cmp1";
static const char *const SanCovTraceCmp2 = "__sanitizer_cov_trace_cmp2";
@@ -86,8 +81,7 @@ static const char *const SanCovTracePCGuardInitName =
static cl::opt<int> ClCoverageLevel(
"sanitizer-coverage-level",
cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, "
- "3: all blocks and critical edges, "
- "4: above plus indirect calls"),
+ "3: all blocks and critical edges"),
cl::Hidden, cl::init(0));
static cl::opt<unsigned> ClCoverageBlockThreshold(
@@ -96,12 +90,6 @@ static cl::opt<unsigned> ClCoverageBlockThreshold(
" more than this number of blocks."),
cl::Hidden, cl::init(0));
-static cl::opt<bool>
- ClExperimentalTracing("sanitizer-coverage-experimental-tracing",
- cl::desc("Experimental basic-block tracing: insert "
- "callbacks at every basic block"),
- cl::Hidden, cl::init(false));
-
static cl::opt<bool> ClExperimentalTracePC("sanitizer-coverage-trace-pc",
cl::desc("Experimental pc tracing"),
cl::Hidden, cl::init(false));
@@ -128,16 +116,6 @@ static cl::opt<bool>
cl::desc("Reduce the number of instrumented blocks"),
cl::Hidden, cl::init(true));
-// Experimental 8-bit counters used as an additional search heuristic during
-// coverage-guided fuzzing.
-// The counters are not thread-friendly:
-// - contention on these counters may cause significant slowdown;
-// - the counter updates are racy and the results may be inaccurate.
-// They are also inaccurate due to 8-bit integer overflow.
-static cl::opt<bool> ClUse8bitCounters("sanitizer-coverage-8bit-counters",
- cl::desc("Experimental 8-bit counters"),
- cl::Hidden, cl::init(false));
-
namespace {
SanitizerCoverageOptions getOptions(int LegacyCoverageLevel) {
@@ -168,11 +146,9 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) {
SanitizerCoverageOptions CLOpts = getOptions(ClCoverageLevel);
Options.CoverageType = std::max(Options.CoverageType, CLOpts.CoverageType);
Options.IndirectCalls |= CLOpts.IndirectCalls;
- Options.TraceBB |= ClExperimentalTracing;
Options.TraceCmp |= ClCMPTracing;
Options.TraceDiv |= ClDIVTracing;
Options.TraceGep |= ClGEPTracing;
- Options.Use8bitCounters |= ClUse8bitCounters;
Options.TracePC |= ClExperimentalTracePC;
Options.TracePCGuard |= ClTracePCGuard;
return Options;
@@ -212,16 +188,15 @@ private:
bool UseCalls);
unsigned NumberOfInstrumentedBlocks() {
return SanCovFunction->getNumUses() +
- SanCovWithCheckFunction->getNumUses() + SanCovTraceBB->getNumUses() +
- SanCovTraceEnter->getNumUses();
+ SanCovWithCheckFunction->getNumUses();
}
StringRef getSanCovTracePCGuardSection() const;
StringRef getSanCovTracePCGuardSectionStart() const;
StringRef getSanCovTracePCGuardSectionEnd() const;
Function *SanCovFunction;
Function *SanCovWithCheckFunction;
- Function *SanCovIndirCallFunction, *SanCovTracePCIndir;
- Function *SanCovTraceEnter, *SanCovTraceBB, *SanCovTracePC, *SanCovTracePCGuard;
+ Function *SanCovTracePCIndir;
+ Function *SanCovTracePC, *SanCovTracePCGuard;
Function *SanCovTraceCmpFunction[4];
Function *SanCovTraceDivFunction[2];
Function *SanCovTraceGepFunction;
@@ -235,7 +210,6 @@ private:
GlobalVariable *GuardArray;
GlobalVariable *FunctionGuardArray; // for trace-pc-guard.
- GlobalVariable *EightBitCounterArray;
bool HasSancovGuardsSection;
SanitizerCoverageOptions Options;
@@ -267,9 +241,6 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
M.getOrInsertFunction(SanCovWithCheckName, VoidTy, Int32PtrTy));
SanCovTracePCIndir = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy));
- SanCovIndirCallFunction =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovIndirCallName, VoidTy, IntptrTy, IntptrTy));
SanCovTraceCmpFunction[0] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
SanCovTraceCmp1, VoidTy, IRB.getInt8Ty(), IRB.getInt8Ty()));
@@ -305,24 +276,15 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
M.getOrInsertFunction(SanCovTracePCName, VoidTy));
SanCovTracePCGuard = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
SanCovTracePCGuardName, VoidTy, Int32PtrTy));
- SanCovTraceEnter = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(SanCovTraceEnterName, VoidTy, Int32PtrTy));
- SanCovTraceBB = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(SanCovTraceBBName, VoidTy, Int32PtrTy));
// At this point we create a dummy array of guards because we don't
// know how many elements we will need.
Type *Int32Ty = IRB.getInt32Ty();
- Type *Int8Ty = IRB.getInt8Ty();
if (!Options.TracePCGuard)
GuardArray =
new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
nullptr, "__sancov_gen_cov_tmp");
- if (Options.Use8bitCounters)
- EightBitCounterArray =
- new GlobalVariable(M, Int8Ty, false, GlobalVariable::ExternalLinkage,
- nullptr, "__sancov_gen_cov_tmp");
for (auto &F : M)
runOnFunction(F);
@@ -344,20 +306,6 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
GuardArray->eraseFromParent();
}
- GlobalVariable *RealEightBitCounterArray;
- if (Options.Use8bitCounters) {
- // Make sure the array is 16-aligned.
- static const int CounterAlignment = 16;
- Type *Int8ArrayNTy = ArrayType::get(Int8Ty, alignTo(N, CounterAlignment));
- RealEightBitCounterArray = new GlobalVariable(
- M, Int8ArrayNTy, false, GlobalValue::PrivateLinkage,
- Constant::getNullValue(Int8ArrayNTy), "__sancov_gen_cov_counter");
- RealEightBitCounterArray->setAlignment(CounterAlignment);
- EightBitCounterArray->replaceAllUsesWith(
- IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy));
- EightBitCounterArray->eraseFromParent();
- }
-
// Create variable for module (compilation unit) name
Constant *ModNameStrConst =
ConstantDataArray::getString(M.getContext(), M.getName(), true);
@@ -396,10 +344,7 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
M, SanCovModuleCtorName, SanCovModuleInitName,
{Int32PtrTy, IntptrTy, Int8PtrTy, Int8PtrTy},
{IRB.CreatePointerCast(RealGuardArray, Int32PtrTy),
- ConstantInt::get(IntptrTy, N),
- Options.Use8bitCounters
- ? IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy)
- : Constant::getNullValue(Int8PtrTy),
+ ConstantInt::get(IntptrTy, N), Constant::getNullValue(Int8PtrTy),
IRB.CreatePointerCast(ModuleName, Int8PtrTy)});
appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
@@ -566,26 +511,15 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls(
Function &F, ArrayRef<Instruction *> IndirCalls) {
if (IndirCalls.empty())
return;
- const int CacheSize = 16;
- const int CacheAlignment = 64; // Align for better performance.
- Type *Ty = ArrayType::get(IntptrTy, CacheSize);
+ if (!Options.TracePC && !Options.TracePCGuard)
+ return;
for (auto I : IndirCalls) {
IRBuilder<> IRB(I);
CallSite CS(I);
Value *Callee = CS.getCalledValue();
if (isa<InlineAsm>(Callee))
continue;
- GlobalVariable *CalleeCache = new GlobalVariable(
- *F.getParent(), Ty, false, GlobalValue::PrivateLinkage,
- Constant::getNullValue(Ty), "__sancov_gen_callee_cache");
- CalleeCache->setAlignment(CacheAlignment);
- if (Options.TracePC || Options.TracePCGuard)
- IRB.CreateCall(SanCovTracePCIndir,
- IRB.CreatePointerCast(Callee, IntptrTy));
- else
- IRB.CreateCall(SanCovIndirCallFunction,
- {IRB.CreatePointerCast(Callee, IntptrTy),
- IRB.CreatePointerCast(CalleeCache, IntptrTy)});
+ IRB.CreateCall(SanCovTracePCIndir, IRB.CreatePointerCast(Callee, IntptrTy));
}
}
@@ -735,9 +669,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
IRB.CreatePointerCast(GuardArray, IntptrTy),
ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4));
GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy);
- if (Options.TraceBB) {
- IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP);
- } else if (UseCalls) {
+ if (UseCalls) {
IRB.CreateCall(SanCovWithCheckFunction, GuardP);
} else {
LoadInst *Load = IRB.CreateLoad(GuardP);
@@ -755,19 +687,6 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
}
}
-
- if (Options.Use8bitCounters) {
- IRB.SetInsertPoint(&*IP);
- Value *P = IRB.CreateAdd(
- IRB.CreatePointerCast(EightBitCounterArray, IntptrTy),
- ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1));
- P = IRB.CreateIntToPtr(P, IRB.getInt8PtrTy());
- LoadInst *LI = IRB.CreateLoad(P);
- Value *Inc = IRB.CreateAdd(LI, ConstantInt::get(IRB.getInt8Ty(), 1));
- StoreInst *SI = IRB.CreateStore(Inc, P);
- SetNoSanitizeMetadata(LI);
- SetNoSanitizeMetadata(SI);
- }
}
StringRef SanitizerCoverageModule::getSanCovTracePCGuardSection() const {
diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp
index 6adfe130d148..b7514a6d5793 100644
--- a/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/lib/Transforms/Scalar/GVNHoist.cpp
@@ -45,6 +45,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -1010,6 +1011,7 @@ public:
AU.addRequired<MemorySSAWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<MemorySSAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
};
} // namespace
@@ -1026,6 +1028,7 @@ PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) {
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<MemorySSAAnalysis>();
+ PA.preserve<GlobalsAA>();
return PA;
}
diff --git a/lib/Transforms/Scalar/LoopLoadElimination.cpp b/lib/Transforms/Scalar/LoopLoadElimination.cpp
index cf63cb660db8..20b37c4b70e6 100644
--- a/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -197,8 +197,7 @@ public:
continue;
// Only progagate the value if they are of the same type.
- if (Store->getPointerOperand()->getType() !=
- Load->getPointerOperand()->getType())
+ if (Store->getPointerOperandType() != Load->getPointerOperandType())
continue;
Candidates.emplace_front(Load, Store);
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index 86058fe0b1aa..fd15a9014def 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -557,7 +557,7 @@ bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
Instruction *UUser = dyn_cast<Instruction>(UU);
// Skip SExt if we are extending an nsw value
// TODO: Allow ZExt too
- if (BO->hasNoSignedWrap() && UUser && UUser->getNumUses() == 1 &&
+ if (BO->hasNoSignedWrap() && UUser && UUser->hasOneUse() &&
isa<SExtInst>(UUser))
UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
if (!isCompareUsedByBranch(UUser))
@@ -852,7 +852,7 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
for (auto &KV : Roots) {
if (KV.first == 0)
continue;
- if (KV.second->getNumUses() != NumBaseUses) {
+ if (!KV.second->hasNUses(NumBaseUses)) {
DEBUG(dbgs() << "LRR: Aborting - Root and Base #users not the same: "
<< "#Base=" << NumBaseUses << ", #Root=" <<
KV.second->getNumUses() << "\n");
@@ -867,7 +867,7 @@ void LoopReroll::DAGRootTracker::
findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) {
// Does the user look like it could be part of a root set?
// All its users must be simple arithmetic ops.
- if (I->getNumUses() > IL_MaxRerollIterations)
+ if (I->hasNUsesOrMore(IL_MaxRerollIterations + 1))
return;
if (I != IV && findRootsBase(I, SubsumedInsts))
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 3d8ce888867e..a014ddd9ba0a 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -138,7 +138,8 @@ PHIExpression::~PHIExpression() = default;
// It also wants to hand us SCC's that are unrelated to the phi node we ask
// about, and have us process them there or risk redoing work.
// Graph traits over a filter iterator also doesn't work that well here.
-// This SCC finder is specialized to walk use-def chains, and only follows instructions,
+// This SCC finder is specialized to walk use-def chains, and only follows
+// instructions,
// not generic values (arguments, etc).
struct TarjanSCC {
@@ -170,8 +171,10 @@ private:
Root[I] = std::min(Root.lookup(I), Root.lookup(Op));
}
}
- // See if we really were the root of a component, by seeing if we still have our DFSNumber.
- // If we do, we are the root of the component, and we have completed a component. If we do not,
+ // See if we really were the root of a component, by seeing if we still have
+ // our DFSNumber.
+ // If we do, we are the root of the component, and we have completed a
+ // component. If we do not,
// we are not the root of a component, and belong on the component stack.
if (Root.lookup(I) == OurDFS) {
unsigned ComponentID = Components.size();
@@ -2254,12 +2257,13 @@ void NewGVN::initializeCongruenceClasses(Function &F) {
MemoryAccessToClass[MSSA->getLiveOnEntryDef()] =
createMemoryClass(MSSA->getLiveOnEntryDef());
- for (auto &B : F) {
+ for (auto DTN : nodes(DT)) {
+ BasicBlock *BB = DTN->getBlock();
// All MemoryAccesses are equivalent to live on entry to start. They must
// be initialized to something so that initial changes are noticed. For
// the maximal answer, we initialize them all to be the same as
// liveOnEntry.
- auto *MemoryBlockDefs = MSSA->getBlockDefs(&B);
+ auto *MemoryBlockDefs = MSSA->getBlockDefs(BB);
if (MemoryBlockDefs)
for (const auto &Def : *MemoryBlockDefs) {
MemoryAccessToClass[&Def] = TOPClass;
@@ -2274,7 +2278,7 @@ void NewGVN::initializeCongruenceClasses(Function &F) {
if (MD && isa<StoreInst>(MD->getMemoryInst()))
TOPClass->incStoreCount();
}
- for (auto &I : B) {
+ for (auto &I : *BB) {
// Don't insert void terminators into the class. We don't value number
// them, and they just end up sitting in TOP.
if (isa<TerminatorInst>(I) && I.getType()->isVoidTy())
@@ -2518,14 +2522,11 @@ void NewGVN::verifyMemoryCongruency() const {
auto ReachableAccessPred =
[&](const std::pair<const MemoryAccess *, CongruenceClass *> Pair) {
bool Result = ReachableBlocks.count(Pair.first->getBlock());
- if (!Result)
+ if (!Result || MSSA->isLiveOnEntryDef(Pair.first) ||
+ MemoryToDFSNum(Pair.first) == 0)
return false;
- if (MSSA->isLiveOnEntryDef(Pair.first))
- return true;
if (auto *MemDef = dyn_cast<MemoryDef>(Pair.first))
return !isInstructionTriviallyDead(MemDef->getMemoryInst());
- if (MemoryToDFSNum(Pair.first) == 0)
- return false;
return true;
};
@@ -2719,25 +2720,13 @@ bool NewGVN::runGVN() {
}
// Now a standard depth first ordering of the domtree is equivalent to RPO.
- auto DFI = df_begin(DT->getRootNode());
- for (auto DFE = df_end(DT->getRootNode()); DFI != DFE; ++DFI) {
- BasicBlock *B = DFI->getBlock();
+ for (auto DTN : depth_first(DT->getRootNode())) {
+ BasicBlock *B = DTN->getBlock();
const auto &BlockRange = assignDFSNumbers(B, ICount);
BlockInstRange.insert({B, BlockRange});
ICount += BlockRange.second - BlockRange.first;
}
- // Handle forward unreachable blocks and figure out which blocks
- // have single preds.
- for (auto &B : F) {
- // Assign numbers to unreachable blocks.
- if (!DFI.nodeVisited(DT->getNode(&B))) {
- const auto &BlockRange = assignDFSNumbers(&B, ICount);
- BlockInstRange.insert({&B, BlockRange});
- ICount += BlockRange.second - BlockRange.first;
- }
- }
-
TouchedInstructions.resize(ICount);
// Ensure we don't end up resizing the expressionToClass map, as
// that can be quite expensive. At most, we have one expression per
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 49ce0262c97b..659353e912fe 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -352,10 +352,20 @@ Value *StructurizeCFG::invert(Value *Condition) {
if (Instruction *Inst = dyn_cast<Instruction>(Condition)) {
// Third: Check all the users for an invert
BasicBlock *Parent = Inst->getParent();
- for (User *U : Condition->users())
- if (Instruction *I = dyn_cast<Instruction>(U))
+ for (User *U : Condition->users()) {
+ if (Instruction *I = dyn_cast<Instruction>(U)) {
if (I->getParent() == Parent && match(I, m_Not(m_Specific(Condition))))
return I;
+ }
+ }
+
+ // Avoid creating a new instruction in the common case of a compare.
+ if (CmpInst *Cmp = dyn_cast<CmpInst>(Inst)) {
+ if (Cmp->hasOneUse()) {
+ Cmp->setPredicate(Cmp->getInversePredicate());
+ return Cmp;
+ }
+ }
// Last option: Create a new instruction
return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator());
diff --git a/lib/Transforms/Utils/CmpInstAnalysis.cpp b/lib/Transforms/Utils/CmpInstAnalysis.cpp
index 60ae3745c835..9f4d9c7e3981 100644
--- a/lib/Transforms/Utils/CmpInstAnalysis.cpp
+++ b/lib/Transforms/Utils/CmpInstAnalysis.cpp
@@ -73,17 +73,17 @@ bool llvm::decomposeBitTestICmp(const ICmpInst *I, CmpInst::Predicate &Pred,
default:
return false;
case ICmpInst::ICMP_SLT:
- // X < 0 is equivalent to (X & SignBit) != 0.
+ // X < 0 is equivalent to (X & SignMask) != 0.
if (!C->isZero())
return false;
- Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth()));
+ Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth()));
Pred = ICmpInst::ICMP_NE;
break;
case ICmpInst::ICMP_SGT:
- // X > -1 is equivalent to (X & SignBit) == 0.
+ // X > -1 is equivalent to (X & SignMask) == 0.
if (!C->isAllOnesValue())
return false;
- Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth()));
+ Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth()));
Pred = ICmpInst::ICMP_EQ;
break;
case ICmpInst::ICMP_ULT:
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 644d93b727b3..82552684b832 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -112,24 +112,6 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs) {
return buildExtractionBlockSet(BBs.begin(), BBs.end());
}
-/// \brief Helper to call buildExtractionBlockSet with a RegionNode.
-static SetVector<BasicBlock *>
-buildExtractionBlockSet(const RegionNode &RN) {
- if (!RN.isSubRegion())
- // Just a single BasicBlock.
- return buildExtractionBlockSet(RN.getNodeAs<BasicBlock>());
-
- const Region &R = *RN.getNodeAs<Region>();
-
- return buildExtractionBlockSet(R.block_begin(), R.block_end());
-}
-
-CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs,
- BlockFrequencyInfo *BFI,
- BranchProbabilityInfo *BPI)
- : DT(nullptr), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
- BPI(BPI), Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {}
-
CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
bool AggregateArgs, BlockFrequencyInfo *BFI,
BranchProbabilityInfo *BPI)
@@ -143,12 +125,6 @@ CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks())),
NumExitBlocks(~0U) {}
-CodeExtractor::CodeExtractor(DominatorTree &DT, const RegionNode &RN,
- bool AggregateArgs, BlockFrequencyInfo *BFI,
- BranchProbabilityInfo *BPI)
- : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
- BPI(BPI), Blocks(buildExtractionBlockSet(RN)), NumExitBlocks(~0U) {}
-
/// definedInRegion - Return true if the specified value is defined in the
/// extracted region.
static bool definedInRegion(const SetVector<BasicBlock *> &Blocks, Value *V) {
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 49b4bd92faf4..089f2b5f3b18 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -85,6 +85,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
UsesToRewrite.clear();
Instruction *I = Worklist.pop_back_val();
+ assert(!I->getType()->isTokenTy() && "Tokens shouldn't be in the worklist");
BasicBlock *InstBB = I->getParent();
Loop *L = LI.getLoopFor(InstBB);
assert(L && "Instruction belongs to a BB that's not part of a loop");
@@ -96,13 +97,6 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
if (ExitBlocks.empty())
continue;
- // Tokens cannot be used in PHI nodes, so we skip over them.
- // We can run into tokens which are live out of a loop with catchswitch
- // instructions in Windows EH if the catchswitch has one catchpad which
- // is inside the loop and another which is not.
- if (I->getType()->isTokenTy())
- continue;
-
for (Use &U : I->uses()) {
Instruction *User = cast<Instruction>(U.getUser());
BasicBlock *UserBB = User->getParent();
@@ -214,13 +208,9 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
// Post process PHI instructions that were inserted into another disjoint
// loop and update their exits properly.
- for (auto *PostProcessPN : PostProcessPHIs) {
- if (PostProcessPN->use_empty())
- continue;
-
- // Reprocess each PHI instruction.
- Worklist.push_back(PostProcessPN);
- }
+ for (auto *PostProcessPN : PostProcessPHIs)
+ if (!PostProcessPN->use_empty())
+ Worklist.push_back(PostProcessPN);
// Keep track of PHI nodes that we want to remove because they did not have
// any uses rewritten.
@@ -241,7 +231,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
// Compute the set of BasicBlocks in the loop `L` dominating at least one exit.
static void computeBlocksDominatingExits(
Loop &L, DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks,
- SmallPtrSet<BasicBlock *, 8> &BlocksDominatingExits) {
+ SmallSetVector<BasicBlock *, 8> &BlocksDominatingExits) {
SmallVector<BasicBlock *, 8> BBWorklist;
// We start from the exit blocks, as every block trivially dominates itself
@@ -279,7 +269,7 @@ static void computeBlocksDominatingExits(
if (!L.contains(IDomBB))
continue;
- if (BlocksDominatingExits.insert(IDomBB).second)
+ if (BlocksDominatingExits.insert(IDomBB))
BBWorklist.push_back(IDomBB);
}
}
@@ -293,7 +283,7 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
if (ExitBlocks.empty())
return false;
- SmallPtrSet<BasicBlock *, 8> BlocksDominatingExits;
+ SmallSetVector<BasicBlock *, 8> BlocksDominatingExits;
// We want to avoid use-scanning leveraging dominance informations.
// If a block doesn't dominate any of the loop exits, the none of the values
@@ -315,6 +305,13 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
!isa<PHINode>(I.user_back())))
continue;
+ // Tokens cannot be used in PHI nodes, so we skip over them.
+ // We can run into tokens which are live out of a loop with catchswitch
+ // instructions in Windows EH if the catchswitch has one catchpad which
+ // is inside the loop and another which is not.
+ if (I.getType()->isTokenTy())
+ continue;
+
Worklist.push_back(&I);
}
}
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 18b29226c2ef..8c5442762643 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -1227,13 +1227,9 @@ bool llvm::LowerDbgDeclare(Function &F) {
// This is a call by-value or some other instruction that
// takes a pointer to the variable. Insert a *value*
// intrinsic that describes the alloca.
- SmallVector<uint64_t, 1> NewDIExpr;
- auto *DIExpr = DDI->getExpression();
- NewDIExpr.push_back(dwarf::DW_OP_deref);
- NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
DIB.insertDbgValueIntrinsic(AI, 0, DDI->getVariable(),
- DIB.createExpression(NewDIExpr),
- DDI->getDebugLoc(), CI);
+ DDI->getExpression(), DDI->getDebugLoc(),
+ CI);
}
}
DDI->eraseFromParent();
diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 73c14f5606b7..5c21490793e7 100644
--- a/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -46,6 +46,11 @@ static cl::opt<unsigned> UnrollForcePeelCount(
"unroll-force-peel-count", cl::init(0), cl::Hidden,
cl::desc("Force a peel count regardless of profiling information."));
+// Designates that a Phi is estimated to become invariant after an "infinite"
+// number of loop iterations (i.e. only may become an invariant if the loop is
+// fully unrolled).
+static const unsigned InfiniteIterationsToInvariance = UINT_MAX;
+
// Check whether we are capable of peeling this loop.
static bool canPeel(Loop *L) {
// Make sure the loop is in simplified form
@@ -66,10 +71,62 @@ static bool canPeel(Loop *L) {
return true;
}
+// This function calculates the number of iterations after which the given Phi
+// becomes an invariant. The pre-calculated values are memorized in the map. The
+// function (shortcut is I) is calculated according to the following definition:
+// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
+// If %y is a loop invariant, then I(%x) = 1.
+// If %y is a Phi from the loop header, I(%x) = I(%y) + 1.
+// Otherwise, I(%x) is infinite.
+// TODO: Actually if %y is an expression that depends only on Phi %z and some
+// loop invariants, we can estimate I(%x) = I(%z) + 1. The example
+// looks like:
+// %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration.
+// %y = phi(0, 5),
+// %a = %y + 1.
+static unsigned calculateIterationsToInvariance(
+ PHINode *Phi, Loop *L, BasicBlock *BackEdge,
+ SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) {
+ assert(Phi->getParent() == L->getHeader() &&
+ "Non-loop Phi should not be checked for turning into invariant.");
+ assert(BackEdge == L->getLoopLatch() && "Wrong latch?");
+ // If we already know the answer, take it from the map.
+ auto I = IterationsToInvariance.find(Phi);
+ if (I != IterationsToInvariance.end())
+ return I->second;
+
+ // Otherwise we need to analyze the input from the back edge.
+ Value *Input = Phi->getIncomingValueForBlock(BackEdge);
+ // Place infinity to map to avoid infinite recursion for cycled Phis. Such
+ // cycles can never stop on an invariant.
+ IterationsToInvariance[Phi] = InfiniteIterationsToInvariance;
+ unsigned ToInvariance = InfiniteIterationsToInvariance;
+
+ if (L->isLoopInvariant(Input))
+ ToInvariance = 1u;
+ else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) {
+ // Only consider Phis in header block.
+ if (IncPhi->getParent() != L->getHeader())
+ return InfiniteIterationsToInvariance;
+ // If the input becomes an invariant after X iterations, then our Phi
+ // becomes an invariant after X + 1 iterations.
+ unsigned InputToInvariance = calculateIterationsToInvariance(
+ IncPhi, L, BackEdge, IterationsToInvariance);
+ if (InputToInvariance != InfiniteIterationsToInvariance)
+ ToInvariance = InputToInvariance + 1u;
+ }
+
+ // If we found that this Phi lies in an invariant chain, update the map.
+ if (ToInvariance != InfiniteIterationsToInvariance)
+ IterationsToInvariance[Phi] = ToInvariance;
+ return ToInvariance;
+}
+
// Return the number of iterations we want to peel off.
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP,
unsigned &TripCount) {
+ assert(LoopSize > 0 && "Zero loop size is not allowed!");
UP.PeelCount = 0;
if (!canPeel(L))
return;
@@ -78,30 +135,37 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (!L->empty())
return;
- // Try to find a Phi node that has the same loop invariant as an input from
- // its only back edge. If there is such Phi, peeling 1 iteration from the
- // loop is profitable, because starting from 2nd iteration we will have an
- // invariant instead of this Phi.
- if (LoopSize <= UP.Threshold) {
+ // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
+ // iterations of the loop. For this we compute the number for iterations after
+ // which every Phi is guaranteed to become an invariant, and try to peel the
+ // maximum number of iterations among these values, thus turning all those
+ // Phis into invariants.
+ // First, check that we can peel at least one iteration.
+ if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) {
+ // Store the pre-calculated values here.
+ SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;
+ // Now go through all Phis to calculate their the number of iterations they
+ // need to become invariants.
+ unsigned DesiredPeelCount = 0;
BasicBlock *BackEdge = L->getLoopLatch();
assert(BackEdge && "Loop is not in simplified form?");
- BasicBlock *Header = L->getHeader();
- // Iterate over Phis to find one with invariant input on back edge.
- bool FoundCandidate = false;
- PHINode *Phi;
- for (auto BI = Header->begin(); isa<PHINode>(&*BI); ++BI) {
- Phi = cast<PHINode>(&*BI);
- Value *Input = Phi->getIncomingValueForBlock(BackEdge);
- if (L->isLoopInvariant(Input)) {
- FoundCandidate = true;
- break;
- }
+ for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
+ PHINode *Phi = cast<PHINode>(&*BI);
+ unsigned ToInvariance = calculateIterationsToInvariance(
+ Phi, L, BackEdge, IterationsToInvariance);
+ if (ToInvariance != InfiniteIterationsToInvariance)
+ DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance);
}
- if (FoundCandidate) {
- DEBUG(dbgs() << "Peel one iteration to get rid of " << *Phi
- << " because starting from 2nd iteration it is always"
- << " an invariant\n");
- UP.PeelCount = 1;
+ if (DesiredPeelCount > 0) {
+ // Pay respect to limitations implied by loop size and the max peel count.
+ unsigned MaxPeelCount = UnrollPeelMaxCount;
+ MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1);
+ DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
+ // Consider max peel count limitation.
+ assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
+ DEBUG(dbgs() << "Peel " << DesiredPeelCount << " iteration(s) to turn"
+ << " some Phis into invariants.\n");
+ UP.PeelCount = DesiredPeelCount;
return;
}
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 127a44df5344..2f575b9d5027 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3086,7 +3086,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {
if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
(QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
return false;
- if (PostBB->getNumUses() != 2 || QBI->getParent()->getNumUses() != 2)
+ if (!PostBB->hasNUses(2) || !QBI->getParent()->hasNUses(2))
return false;
// OK, this is a sequence of two diamonds or triangles.
diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
index 4aeea02b1b1b..83bd29dbca65 100644
--- a/lib/Transforms/Utils/VNCoercion.cpp
+++ b/lib/Transforms/Utils/VNCoercion.cpp
@@ -24,6 +24,11 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy))
return false;
+ // Don't coerce non-integral pointers to integers or vice versa.
+ if (DL.isNonIntegralPointerType(StoredVal->getType()) !=
+ DL.isNonIntegralPointerType(LoadTy))
+ return false;
+
return true;
}
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 595b2ec88943..7eb8fabe0b2f 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -422,7 +422,8 @@ protected:
// When we if-convert we need to create edge masks. We have to cache values
// so that we don't end up with exponential recursion/IR.
typedef DenseMap<std::pair<BasicBlock *, BasicBlock *>, VectorParts>
- EdgeMaskCache;
+ EdgeMaskCacheTy;
+ typedef DenseMap<BasicBlock *, VectorParts> BlockMaskCacheTy;
/// Create an empty loop, based on the loop ranges of the old loop.
void createEmptyLoop();
@@ -785,7 +786,8 @@ protected:
/// Store instructions that should be predicated, as a pair
/// <StoreInst, Predicate>
SmallVector<std::pair<Instruction *, Value *>, 4> PredicatedInstructions;
- EdgeMaskCache MaskCache;
+ EdgeMaskCacheTy EdgeMaskCache;
+ BlockMaskCacheTy BlockMaskCache;
/// Trip count of the original loop.
Value *TripCount;
/// Trip count of the widened loop (TripCount - TripCount % (VF*UF))
@@ -4560,8 +4562,8 @@ InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
// Look for cached value.
std::pair<BasicBlock *, BasicBlock *> Edge(Src, Dst);
- EdgeMaskCache::iterator ECEntryIt = MaskCache.find(Edge);
- if (ECEntryIt != MaskCache.end())
+ EdgeMaskCacheTy::iterator ECEntryIt = EdgeMaskCache.find(Edge);
+ if (ECEntryIt != EdgeMaskCache.end())
return ECEntryIt->second;
VectorParts SrcMask = createBlockInMask(Src);
@@ -4580,11 +4582,11 @@ InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
for (unsigned part = 0; part < UF; ++part)
EdgeMask[part] = Builder.CreateAnd(EdgeMask[part], SrcMask[part]);
- MaskCache[Edge] = EdgeMask;
+ EdgeMaskCache[Edge] = EdgeMask;
return EdgeMask;
}
- MaskCache[Edge] = SrcMask;
+ EdgeMaskCache[Edge] = SrcMask;
return SrcMask;
}
@@ -4592,10 +4594,17 @@ InnerLoopVectorizer::VectorParts
InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
assert(OrigLoop->contains(BB) && "Block is not a part of a loop");
+ // Look for cached value.
+ BlockMaskCacheTy::iterator BCEntryIt = BlockMaskCache.find(BB);
+ if (BCEntryIt != BlockMaskCache.end())
+ return BCEntryIt->second;
+
// Loop incoming mask is all-one.
if (OrigLoop->getHeader() == BB) {
Value *C = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1);
- return getVectorValue(C);
+ const VectorParts &BlockMask = getVectorValue(C);
+ BlockMaskCache[BB] = BlockMask;
+ return BlockMask;
}
// This is the block mask. We OR all incoming edges, and with zero.
@@ -4609,6 +4618,7 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
BlockMask[part] = Builder.CreateOr(BlockMask[part], EM[part]);
}
+ BlockMaskCache[BB] = BlockMask;
return BlockMask;
}
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index da3ac06ab464..554944404708 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4146,8 +4146,8 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
if (AllowReorder && R.shouldReorder()) {
// Conceptually, there is nothing actually preventing us from trying to
// reorder a larger list. In fact, we do exactly this when vectorizing
- // reductions. However, at this point, we only expect to get here from
- // tryToVectorizePair().
+ // reductions. However, at this point, we only expect to get here when
+ // there are exactly two operations.
assert(Ops.size() == 2);
assert(BuildVectorSlice.empty());
Value *ReorderedOps[] = {Ops[1], Ops[0]};
@@ -4904,7 +4904,13 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Try to vectorize them.
unsigned NumElts = (SameTypeIt - IncIt);
DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n");
- if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R)) {
+ // The order in which the phi nodes appear in the program does not matter.
+ // So allow tryToVectorizeList to reorder them if it is beneficial. This
+ // is done when there are exactly two elements since tryToVectorizeList
+ // asserts that there are only two values when AllowReorder is true.
+ bool AllowReorder = NumElts == 2;
+ if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R,
+ None, AllowReorder)) {
// Success start over because instructions might have been changed.
HaveVectorizedPhiNodes = true;
Changed = true;