diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-04-20 21:19:10 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-04-20 21:19:10 +0000 |
commit | d99dafe2e4a385dd2a6c76da6d8258deb100657b (patch) | |
tree | ba60bf957558bd114f25dbff3d4996b5d7a61c82 /lib/Transforms | |
parent | 71d5a2540a98c81f5bcaeb48805e0e2881f530ef (diff) |
Notes
Diffstat (limited to 'lib/Transforms')
32 files changed, 470 insertions, 457 deletions
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 375b74c494d9..8e26849ea9e3 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -167,15 +167,12 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) { // Drop any attributes that were on the vararg arguments. AttributeList PAL = CS.getAttributes(); - if (!PAL.isEmpty() && PAL.getSlotIndex(PAL.getNumSlots() - 1) > NumArgs) { - SmallVector<AttributeList, 8> AttributesVec; - for (unsigned i = 0; PAL.getSlotIndex(i) <= NumArgs; ++i) - AttributesVec.push_back(PAL.getSlotAttributes(i)); - if (PAL.hasAttributes(AttributeList::FunctionIndex)) - AttributesVec.push_back(AttributeList::get(Fn.getContext(), - AttributeList::FunctionIndex, - PAL.getFnAttributes())); - PAL = AttributeList::get(Fn.getContext(), AttributesVec); + if (!PAL.isEmpty()) { + SmallVector<AttributeSet, 8> ArgAttrs; + for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo) + ArgAttrs.push_back(PAL.getParamAttributes(ArgNo)); + PAL = AttributeList::get(Fn.getContext(), PAL.getFnAttributes(), + PAL.getRetAttributes(), ArgAttrs); } SmallVector<OperandBundleDef, 1> OpBundles; diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 4d13b3f40688..9648883b7f27 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -222,15 +222,11 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) { MadeChange = true; // Clear out any existing attributes. - AttrBuilder B; - B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone); - F->removeAttributes( - AttributeList::FunctionIndex, - AttributeList::get(F->getContext(), AttributeList::FunctionIndex, B)); + F->removeFnAttr(Attribute::ReadOnly); + F->removeFnAttr(Attribute::ReadNone); // Add in the new attribute. - F->addAttribute(AttributeList::FunctionIndex, - ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone); + F->addFnAttr(ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone); if (ReadsMemory) ++NumReadOnly; @@ -495,9 +491,6 @@ determinePointerReadAttrs(Argument *A, static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) { bool Changed = false; - AttrBuilder B; - B.addAttribute(Attribute::Returned); - // Check each function in turn, determining if an argument is always returned. for (Function *F : SCCNodes) { // We can infer and propagate function attributes only when we know that the @@ -535,7 +528,7 @@ static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) { if (Value *RetArg = FindRetArg()) { auto *A = cast<Argument>(RetArg); - A->addAttr(AttributeList::get(F->getContext(), A->getArgNo() + 1, B)); + A->addAttr(Attribute::Returned); ++NumReturned; Changed = true; } @@ -593,9 +586,6 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { ArgumentGraph AG; - AttrBuilder B; - B.addAttribute(Attribute::NoCapture); - // Check each function in turn, determining which pointer arguments are not // captured. for (Function *F : SCCNodes) { @@ -614,7 +604,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; ++A) { if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) { - A->addAttr(AttributeList::get(F->getContext(), A->getArgNo() + 1, B)); + A->addAttr(Attribute::NoCapture); ++NumNoCapture; Changed = true; } @@ -633,8 +623,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { if (!Tracker.Captured) { if (Tracker.Uses.empty()) { // If it's trivially not captured, mark it nocapture now. - A->addAttr( - AttributeList::get(F->getContext(), A->getArgNo() + 1, B)); + A->addAttr(Attribute::NoCapture); ++NumNoCapture; Changed = true; } else { @@ -660,9 +649,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { Self.insert(&*A); Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self); if (R != Attribute::None) { - AttrBuilder B; - B.addAttribute(R); - A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B)); + A->addAttr(R); Changed = true; R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; } @@ -687,7 +674,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { if (ArgumentSCC[0]->Uses.size() == 1 && ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { Argument *A = ArgumentSCC[0]->Definition; - A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B)); + A->addAttr(Attribute::NoCapture); ++NumNoCapture; Changed = true; } @@ -729,7 +716,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { Argument *A = ArgumentSCC[i]->Definition; - A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B)); + A->addAttr(Attribute::NoCapture); ++NumNoCapture; Changed = true; } @@ -760,15 +747,12 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { } if (ReadAttr != Attribute::None) { - AttrBuilder B, R; - B.addAttribute(ReadAttr); - R.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone); for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { Argument *A = ArgumentSCC[i]->Definition; // Clear out existing readonly/readnone attributes - A->removeAttr( - AttributeList::get(A->getContext(), A->getArgNo() + 1, R)); - A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B)); + A->removeAttr(Attribute::ReadOnly); + A->removeAttr(Attribute::ReadNone); + A->addAttr(ReadAttr); ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; Changed = true; } diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index ade4f21ceb52..ae9d4ce11e0d 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -1979,16 +1979,11 @@ static void ChangeCalleesToFastCall(Function *F) { } } -static AttributeList StripNest(LLVMContext &C, const AttributeList &Attrs) { - for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { - unsigned Index = Attrs.getSlotIndex(i); - if (!Attrs.getSlotAttributes(i).hasAttribute(Index, Attribute::Nest)) - continue; - - // There can be only one. - return Attrs.removeAttribute(C, Index, Attribute::Nest); - } - +static AttributeList StripNest(LLVMContext &C, AttributeList Attrs) { + // There can be at most one attribute set with a nest attribute. + unsigned NestIndex; + if (Attrs.hasAttrSomewhere(Attribute::Nest, &NestIndex)) + return Attrs.removeAttribute(C, NestIndex, Attribute::Nest); return Attrs; } diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index 3371de6e3d14..e755e2bd8f26 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -43,6 +43,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ValueSymbolTable.h" #include "llvm/Pass.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/SampleProfReader.h" @@ -208,6 +209,12 @@ protected: /// the same number of times. EquivalenceClassMap EquivalenceClass; + /// Map from function name to Function *. Used to find the function from + /// the function name. If the function name contains suffix, additional + /// entry is added to map from the stripped name to the function if there + /// is one-to-one mapping. + StringMap<Function *> SymbolMap; + /// \brief Dominance, post-dominance and loop information. std::unique_ptr<DominatorTree> DT; std::unique_ptr<DominatorTreeBase<BasicBlock>> PDT; @@ -670,7 +677,7 @@ bool SampleProfileLoader::inlineHotFunctions( for (auto &I : BB.getInstList()) { const FunctionSamples *FS = nullptr; if ((isa<CallInst>(I) || isa<InvokeInst>(I)) && - (FS = findCalleeFunctionSamples(I))) { + !isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) { Candidates.push_back(&I); if (callsiteIsHot(Samples, FS)) Hot = true; @@ -689,7 +696,10 @@ bool SampleProfileLoader::inlineHotFunctions( for (const auto *FS : findIndirectCallFunctionSamples(*I)) { auto CalleeFunctionName = FS->getName(); const char *Reason = "Callee function not available"; - CalledFunction = F.getParent()->getFunction(CalleeFunctionName); + auto R = SymbolMap.find(CalleeFunctionName); + if (R == SymbolMap.end()) + continue; + CalledFunction = R->getValue(); if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) { // The indirect target was promoted and inlined in the profile, as a // result, we do not have profile info for the branch probability. @@ -1181,8 +1191,11 @@ void SampleProfileLoader::propagateWeights(Function &F) { if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI)) continue; + DebugLoc BranchLoc = TI->getDebugLoc(); DEBUG(dbgs() << "\nGetting weights for branch at line " - << TI->getDebugLoc().getLine() << ".\n"); + << ((BranchLoc) ? Twine(BranchLoc.getLine()) + : Twine("<UNKNOWN LOCATION>")) + << ".\n"); SmallVector<uint32_t, 4> Weights; uint32_t MaxWeight = 0; DebugLoc MaxDestLoc; @@ -1219,7 +1232,6 @@ void SampleProfileLoader::propagateWeights(Function &F) { DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n"); TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); - DebugLoc BranchLoc = TI->getDebugLoc(); emitOptimizationRemark( Ctx, DEBUG_TYPE, F, MaxDestLoc, Twine("most popular destination for conditional branches at ") + @@ -1414,6 +1426,26 @@ bool SampleProfileLoader::runOnModule(Module &M) { for (const auto &I : Reader->getProfiles()) TotalCollectedSamples += I.second.getTotalSamples(); + // Populate the symbol map. + for (const auto &N_F : M.getValueSymbolTable()) { + std::string OrigName = N_F.getKey(); + Function *F = dyn_cast<Function>(N_F.getValue()); + if (F == nullptr) + continue; + SymbolMap[OrigName] = F; + auto pos = OrigName.find('.'); + if (pos != std::string::npos) { + std::string NewName = OrigName.substr(0, pos); + auto r = SymbolMap.insert(std::make_pair(NewName, F)); + // Failiing to insert means there is already an entry in SymbolMap, + // thus there are multiple functions that are mapped to the same + // stripped name. In this case of name conflicting, set the value + // to nullptr to avoid confusion. + if (!r.second) + r.first->second = nullptr; + } + } + bool retval = false; for (auto &F : M) if (!F.isDeclaration()) { diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 65deb82cd2a5..9801a0a61416 100644 --- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -363,6 +363,7 @@ void splitAndWriteThinLTOBitcode( W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, /*GenerateHash=*/true, &ModHash); W.writeModule(MergedM.get()); + W.writeStrtab(); OS << Buffer; // If a minimized bitcode module was requested for the thin link, @@ -375,6 +376,7 @@ void splitAndWriteThinLTOBitcode( W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, /*GenerateHash=*/false, &ModHash); W2.writeModule(MergedM.get()); + W2.writeStrtab(); *ThinLinkOS << Buffer; } } diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 174ec8036274..e30a4bafb9b0 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1044,14 +1044,14 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { const APInt *RHSC; if (match(RHS, m_APInt(RHSC))) { - if (RHSC->isSignBit()) { + if (RHSC->isSignMask()) { // If wrapping is not allowed, then the addition must set the sign bit: - // X + (signbit) --> X | signbit + // X + (signmask) --> X | signmask if (I.hasNoSignedWrap() || I.hasNoUnsignedWrap()) return BinaryOperator::CreateOr(LHS, RHS); // If wrapping is allowed, then the addition flips the sign bit of LHS: - // X + (signbit) --> X ^ signbit + // X + (signmask) --> X ^ signmask return BinaryOperator::CreateXor(LHS, RHS); } @@ -1120,9 +1120,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), XorLHS); } - // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C, - // transform them into (X + (signbit ^ C)) - if (XorRHS->getValue().isSignBit()) + // (X + signmask) + C could have gotten canonicalized to (X^signmask) + C, + // transform them into (X + (signmask ^ C)) + if (XorRHS->getValue().isSignMask()) return BinaryOperator::CreateAdd(XorLHS, ConstantExpr::getXor(XorRHS, CI)); } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index b2a41c699202..3a98e8937bda 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2078,7 +2078,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Value *NOr = Builder->CreateOr(A, Op1); NOr->takeName(Op0); return BinaryOperator::CreateXor(NOr, - cast<Instruction>(Op0)->getOperand(1)); + ConstantInt::get(NOr->getType(), *C)); } // Y|(X^C) -> (X|Y)^C iff Y&C == 0 @@ -2087,7 +2087,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Value *NOr = Builder->CreateOr(A, Op0); NOr->takeName(Op0); return BinaryOperator::CreateXor(NOr, - cast<Instruction>(Op1)->getOperand(1)); + ConstantInt::get(NOr->getType(), *C)); } } @@ -2480,8 +2480,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); return BinaryOperator::CreateSub(SubOne(NegOp0CI), Op0I->getOperand(0)); - } else if (RHSC->getValue().isSignBit()) { - // (X + C) ^ signbit -> (X + C + signbit) + } else if (RHSC->getValue().isSignMask()) { + // (X + C) ^ signmask -> (X + C + signmask) Constant *C = Builder->getInt(RHSC->getValue() + Op0CI->getValue()); return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 69484f47223f..e7aa1a457371 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -839,7 +839,8 @@ static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0, // Length bits. if (CI0) { APInt Elt = CI0->getValue(); - Elt = Elt.lshr(Index).zextOrTrunc(Length); + Elt.lshrInPlace(Index); + Elt = Elt.zextOrTrunc(Length); return LowConstantHighUndef(Elt.getZExtValue()); } @@ -1036,7 +1037,7 @@ static Value *simplifyX86vpermilvar(const IntrinsicInst &II, // The PD variants uses bit 1 to select per-lane element index, so // shift down to convert to generic shuffle mask index. if (IsPD) - Index = Index.lshr(1); + Index.lshrInPlace(1); // The _256 variants are a bit trickier since the mask bits always index // into the corresponding 128 half. In order to convert to a generic @@ -4067,21 +4068,15 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && - !CallerPAL.isEmpty()) + !CallerPAL.isEmpty()) { // In this case we have more arguments than the new function type, but we // won't be dropping them. Check that these extra arguments have attributes // that are compatible with being a vararg call argument. - for (unsigned i = CallerPAL.getNumSlots(); i; --i) { - unsigned Index = CallerPAL.getSlotIndex(i - 1); - if (Index <= FT->getNumParams()) - break; - - // Check if it has an attribute that's incompatible with varargs. - AttributeList PAttrs = CallerPAL.getSlotAttributes(i - 1); - if (PAttrs.hasAttribute(Index, Attribute::StructRet)) - return false; - } - + unsigned SRetIdx; + if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) && + SRetIdx > FT->getNumParams()) + return false; + } // Okay, we decided that this is a safe thing to do: go ahead and start // inserting cast instructions as necessary. diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 25683132c786..9127ddca5915 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1591,7 +1591,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { // GEP into CI would undo canonicalizing addrspacecast with different // pointer types, causing infinite loops. (!isa<AddrSpaceCastInst>(CI) || - GEP->getType() == GEP->getPointerOperand()->getType())) { + GEP->getType() == GEP->getPointerOperandType())) { // Changing the cast operand is usually not a good idea but it is safe // here because the pointer operand is being replaced with another // pointer operand so the opcode doesn't need to change. diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index bbafa9e9f468..003029ae39d5 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -140,7 +140,7 @@ static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS, case ICmpInst::ICMP_UGE: // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) TrueIfSigned = true; - return RHS.isSignBit(); + return RHS.isSignMask(); default: return false; } @@ -1532,14 +1532,14 @@ Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp, } if (Xor->hasOneUse()) { - // (icmp u/s (xor X SignBit), C) -> (icmp s/u X, (xor C SignBit)) - if (!Cmp.isEquality() && XorC->isSignBit()) { + // (icmp u/s (xor X SignMask), C) -> (icmp s/u X, (xor C SignMask)) + if (!Cmp.isEquality() && XorC->isSignMask()) { Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate() : Cmp.getSignedPredicate(); return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), *C ^ *XorC)); } - // (icmp u/s (xor X ~SignBit), C) -> (icmp s/u X, (xor C ~SignBit)) + // (icmp u/s (xor X ~SignMask), C) -> (icmp s/u X, (xor C ~SignMask)) if (!Cmp.isEquality() && XorC->isMaxSignedValue()) { Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate() : Cmp.getSignedPredicate(); @@ -2402,9 +2402,9 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp, const APInt &Upper = CR.getUpper(); const APInt &Lower = CR.getLower(); if (Cmp.isSigned()) { - if (Lower.isSignBit()) + if (Lower.isSignMask()) return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, Upper)); - if (Upper.isSignBit()) + if (Upper.isSignMask()) return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, Lower)); } else { if (Lower.isMinValue()) @@ -2604,7 +2604,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, break; // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 - if (BOC->isSignBit()) { + if (BOC->isSignMask()) { Constant *Zero = Constant::getNullValue(BOp0->getType()); auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; return new ICmpInst(NewPred, BOp0, Zero); @@ -3032,9 +3032,9 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b return new ICmpInst(I.getPredicate(), BO0->getOperand(0), BO1->getOperand(0)); - // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b + // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) { - if (CI->getValue().isSignBit()) { + if (CI->getValue().isSignMask()) { ICmpInst::Predicate Pred = I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate(); return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); @@ -3797,7 +3797,7 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal, static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth, bool isSignCheck) { if (isSignCheck) - return APInt::getSignBit(BitWidth); + return APInt::getSignMask(BitWidth); ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1)); if (!CI) return APInt::getAllOnesValue(BitWidth); diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 6288e054f1bc..675553017838 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -931,6 +931,18 @@ static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr, return nullptr; } +static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) { + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { + const Value *GEPI0 = GEPI->getOperand(0); + if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0) + return true; + } + if (isa<UndefValue>(Op) || + (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) + return true; + return false; +} + Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); @@ -979,27 +991,13 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (!LI.isUnordered()) return nullptr; // load(gep null, ...) -> unreachable - if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { - const Value *GEPI0 = GEPI->getOperand(0); - // TODO: Consider a target hook for valid address spaces for this xform. - if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){ - // Insert a new store to null instruction before the load to indicate - // that this code is not reachable. We do this instead of inserting - // an unreachable instruction directly because we cannot modify the - // CFG. - new StoreInst(UndefValue::get(LI.getType()), - Constant::getNullValue(Op->getType()), &LI); - return replaceInstUsesWith(LI, UndefValue::get(LI.getType())); - } - } - // load null/undef -> unreachable - // TODO: Consider a target hook for valid address spaces for this xform. - if (isa<UndefValue>(Op) || - (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { - // Insert a new store to null instruction before the load to indicate that - // this code is not reachable. We do this instead of inserting an - // unreachable instruction directly because we cannot modify the CFG. + // TODO: Consider a target hook for valid address spaces for this xforms. + if (canSimplifyNullLoadOrGEP(LI, Op)) { + // Insert a new store to null instruction before the load to indicate + // that this code is not reachable. We do this instead of inserting + // an unreachable instruction directly because we cannot modify the + // CFG. new StoreInst(UndefValue::get(LI.getType()), Constant::getNullValue(Op->getType()), &LI); return replaceInstUsesWith(LI, UndefValue::get(LI.getType())); diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index f1ac82057e6c..ce66581a491a 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -944,22 +944,21 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { } } - if (ConstantInt *One = dyn_cast<ConstantInt>(Op0)) { - if (One->isOne() && !I.getType()->isIntegerTy(1)) { - bool isSigned = I.getOpcode() == Instruction::SDiv; - if (isSigned) { - // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the - // result is one, if Op1 is -1 then the result is minus one, otherwise - // it's zero. - Value *Inc = Builder->CreateAdd(Op1, One); - Value *Cmp = Builder->CreateICmpULT( - Inc, ConstantInt::get(I.getType(), 3)); - return SelectInst::Create(Cmp, Op1, ConstantInt::get(I.getType(), 0)); - } else { - // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the - // result is one, otherwise it's zero. - return new ZExtInst(Builder->CreateICmpEQ(Op1, One), I.getType()); - } + if (match(Op0, m_One())) { + assert(!I.getType()->getScalarType()->isIntegerTy(1) && + "i1 divide not removed?"); + if (I.getOpcode() == Instruction::SDiv) { + // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the + // result is one, if Op1 is -1 then the result is minus one, otherwise + // it's zero. + Value *Inc = Builder->CreateAdd(Op1, Op0); + Value *Cmp = Builder->CreateICmpULT( + Inc, ConstantInt::get(I.getType(), 3)); + return SelectInst::Create(Cmp, Op1, ConstantInt::get(I.getType(), 0)); + } else { + // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the + // result is one, otherwise it's zero. + return new ZExtInst(Builder->CreateICmpEQ(Op1, Op0), I.getType()); } } @@ -1238,25 +1237,23 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { // If the sign bits of both operands are zero (i.e. we can prove they are // unsigned inputs), turn this into a udiv. - if (I.getType()->isIntegerTy()) { - APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); - if (MaskedValueIsZero(Op0, Mask, 0, &I)) { - if (MaskedValueIsZero(Op1, Mask, 0, &I)) { - // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set - auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); - BO->setIsExact(I.isExact()); - return BO; - } + APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits())); + if (MaskedValueIsZero(Op0, Mask, 0, &I)) { + if (MaskedValueIsZero(Op1, Mask, 0, &I)) { + // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set + auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + BO->setIsExact(I.isExact()); + return BO; + } - if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) { - // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) - // Safe because the only negative value (1 << Y) can take on is - // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have - // the sign bit set. - auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); - BO->setIsExact(I.isExact()); - return BO; - } + if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) { + // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) + // Safe because the only negative value (1 << Y) can take on is + // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have + // the sign bit set. + auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + BO->setIsExact(I.isExact()); + return BO; } } @@ -1546,13 +1543,11 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { // If the sign bits of both operands are zero (i.e. we can prove they are // unsigned inputs), turn this into a urem. - if (I.getType()->isIntegerTy()) { - APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); - if (MaskedValueIsZero(Op1, Mask, 0, &I) && - MaskedValueIsZero(Op0, Mask, 0, &I)) { - // X srem Y -> X urem Y, iff X and Y don't have sign bit set - return BinaryOperator::CreateURem(Op0, Op1, I.getName()); - } + APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits())); + if (MaskedValueIsZero(Op1, Mask, 0, &I) && + MaskedValueIsZero(Op0, Mask, 0, &I)) { + // X srem Y -> X urem Y, iff X and Y don't have sign bit set + return BinaryOperator::CreateURem(Op0, Op1, I.getName()); } // If it's a constant vector, flip any negative values positive. diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 693b6c95c169..5d6d899da4b5 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -618,7 +618,7 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, { unsigned BitWidth = DL.getTypeSizeInBits(TrueVal->getType()->getScalarType()); - APInt MinSignedValue = APInt::getSignBit(BitWidth); + APInt MinSignedValue = APInt::getSignedMinValue(BitWidth); Value *X; const APInt *Y, *C; bool TrueWhenUnset; diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 9aa679c60e47..f77d713b9b07 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -370,7 +370,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, MaskV <<= Op1C->getZExtValue(); else { assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); - MaskV = MaskV.lshr(Op1C->getZExtValue()); + MaskV.lshrInPlace(Op1C->getZExtValue()); } // shift1 & 0x00FF @@ -760,7 +760,7 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { } // See if we can turn a signed shr into an unsigned shr. - if (MaskedValueIsZero(Op0, APInt::getSignBit(BitWidth), 0, &I)) + if (MaskedValueIsZero(Op0, APInt::getSignMask(BitWidth), 0, &I)) return BinaryOperator::CreateLShr(Op0, Op1); return nullptr; diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 4e6f02058d83..2ba052b7e02d 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -38,7 +38,7 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, // If there are no bits set that aren't demanded, nothing to do. Demanded = Demanded.zextOrTrunc(C->getBitWidth()); - if ((~Demanded & *C) == 0) + if (C->isSubsetOf(Demanded)) return false; // This instruction is producing bits that are not demanded. Shrink the RHS. @@ -117,27 +117,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownOne.getBitWidth() == BitWidth && "Value *V, DemandedMask, KnownZero and KnownOne " "must have same BitWidth"); - const APInt *C; - if (match(V, m_APInt(C))) { - // We know all of the bits for a scalar constant or a splat vector constant! - KnownOne = *C & DemandedMask; - KnownZero = ~KnownOne & DemandedMask; - return nullptr; - } - if (isa<ConstantPointerNull>(V)) { - // We know all of the bits for a constant! - KnownOne.clearAllBits(); - KnownZero = DemandedMask; + + if (isa<Constant>(V)) { + computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); return nullptr; } KnownZero.clearAllBits(); KnownOne.clearAllBits(); - if (DemandedMask == 0) { // Not demanding any bits from V. - if (isa<UndefValue>(V)) - return nullptr; + if (DemandedMask == 0) // Not demanding any bits from V. return UndefValue::get(VTy); - } if (Depth == 6) // Limit search depth. return nullptr; @@ -187,16 +176,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) return Constant::getIntegerValue(VTy, IKnownOne); // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and'. - if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == - (DemandedMask & ~LHSKnownZero)) + if (DemandedMask.isSubsetOf(LHSKnownZero | RHSKnownOne)) return I->getOperand(0); - if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == - (DemandedMask & ~RHSKnownZero)) + if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownOne)) return I->getOperand(1); // If the RHS is a constant, see if we can simplify it. @@ -224,25 +211,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) return Constant::getIntegerValue(VTy, IKnownOne); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. - if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == - (DemandedMask & ~LHSKnownOne)) + if (DemandedMask.isSubsetOf(LHSKnownOne | RHSKnownZero)) return I->getOperand(0); - if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == - (DemandedMask & ~RHSKnownOne)) - return I->getOperand(1); - - // If all of the potentially set bits on one side are known to be set on - // the other side, just use the 'other' side. - if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == - (DemandedMask & (~RHSKnownZero))) - return I->getOperand(0); - if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == - (DemandedMask & (~LHSKnownZero))) + if (DemandedMask.isSubsetOf(RHSKnownOne | LHSKnownZero)) return I->getOperand(1); // If the RHS is a constant, see if we can simplify it. @@ -271,20 +247,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) return Constant::getIntegerValue(VTy, IKnownOne); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. - if ((DemandedMask & RHSKnownZero) == DemandedMask) + if (DemandedMask.isSubsetOf(RHSKnownZero)) return I->getOperand(0); - if ((DemandedMask & LHSKnownZero) == DemandedMask) + if (DemandedMask.isSubsetOf(LHSKnownZero)) return I->getOperand(1); // If all of the demanded bits are known to be zero on one side or the // other, turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 - if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) { + if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownZero)) { Instruction *Or = BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), I->getName()); @@ -295,14 +271,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // bits on that side are also known to be set on the other side, turn this // into an AND, as we know the bits will be cleared. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 - if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { - // all known - if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { - Constant *AndC = Constant::getIntegerValue(VTy, - ~RHSKnownOne & DemandedMask); - Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC); - return InsertNewInstWith(And, *I); - } + if (DemandedMask.isSubsetOf(RHSKnownZero|RHSKnownOne) && + RHSKnownOne.isSubsetOf(LHSKnownOne)) { + Constant *AndC = Constant::getIntegerValue(VTy, + ~RHSKnownOne & DemandedMask); + Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC); + return InsertNewInstWith(And, *I); } // If the RHS is a constant, see if we can simplify it. @@ -529,9 +503,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownZero.setLowBits(ShiftAmt); } break; - case Instruction::LShr: - // For a logical shift right - if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + case Instruction::LShr: { + const APInt *SA; + if (match(I->getOperand(1), m_APInt(SA))) { uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Unsigned shift right. @@ -546,13 +520,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Depth + 1)) return I; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); - KnownZero = KnownZero.lshr(ShiftAmt); - KnownOne = KnownOne.lshr(ShiftAmt); + KnownZero.lshrInPlace(ShiftAmt); + KnownOne.lshrInPlace(ShiftAmt); if (ShiftAmt) KnownZero.setHighBits(ShiftAmt); // high bits known zero. } break; - case Instruction::AShr: + } + case Instruction::AShr: { // If this is an arithmetic shift right and only the low-bit is set, we can // always convert this into a logical shr, even if the shift amount is // variable. The low bit of the shift cannot be an input sign bit unless @@ -566,15 +541,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the sign bit is the only bit demanded by this ashr, then there is no // need to do it, the shift doesn't change the high bit. - if (DemandedMask.isSignBit()) + if (DemandedMask.isSignMask()) return I->getOperand(0); - if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + const APInt *SA; + if (match(I->getOperand(1), m_APInt(SA))) { uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Signed shift right. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); - // If any of the "high bits" are demanded, we should set the sign bit as + // If any of the high bits are demanded, we should set the sign bit as // demanded. if (DemandedMask.countLeadingZeros() <= ShiftAmt) DemandedMaskIn.setSignBit(); @@ -587,31 +563,32 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne, Depth + 1)) return I; + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); // Compute the new bits that are at the top now. APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); - KnownZero = KnownZero.lshr(ShiftAmt); - KnownOne = KnownOne.lshr(ShiftAmt); + KnownZero.lshrInPlace(ShiftAmt); + KnownOne.lshrInPlace(ShiftAmt); // Handle the sign bits. - APInt SignBit(APInt::getSignBit(BitWidth)); + APInt SignMask(APInt::getSignMask(BitWidth)); // Adjust to where it is now in the mask. - SignBit = SignBit.lshr(ShiftAmt); + SignMask.lshrInPlace(ShiftAmt); // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] || (HighBits & ~DemandedMask) == HighBits) { - // Perform the logical shift right. - BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0), - SA, I->getName()); - NewVal->setIsExact(cast<BinaryOperator>(I)->isExact()); - return InsertNewInstWith(NewVal, *I); - } else if ((KnownOne & SignBit) != 0) { // New bits are known one. + BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0), + I->getOperand(1)); + LShr->setIsExact(cast<BinaryOperator>(I)->isExact()); + return InsertNewInstWith(LShr, *I); + } else if ((KnownOne & SignMask) != 0) { // New bits are known one. KnownOne |= HighBits; } } break; + } case Instruction::SRem: if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { // X % -1 demands all the bits because we don't want to introduce @@ -624,7 +601,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return I->getOperand(0); APInt LowBits = RA - 1; - APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); + APInt Mask2 = LowBits | APInt::getSignMask(BitWidth); if (SimplifyDemandedBits(I, 0, Mask2, LHSKnownZero, LHSKnownOne, Depth + 1)) return I; @@ -635,26 +612,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If LHS is non-negative or has all low bits zero, then the upper bits // are all zero. - if (LHSKnownZero.isNegative() || ((LHSKnownZero & LowBits) == LowBits)) + if (LHSKnownZero.isSignBitSet() || ((LHSKnownZero & LowBits) == LowBits)) KnownZero |= ~LowBits; // If LHS is negative and not all low bits are zero, then the upper bits // are all one. - if (LHSKnownOne.isNegative() && ((LHSKnownOne & LowBits) != 0)) + if (LHSKnownOne.isSignBitSet() && ((LHSKnownOne & LowBits) != 0)) KnownOne |= ~LowBits; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + break; } } // The sign bit is the LHS's sign bit, except when the result of the // remainder is zero. - if (DemandedMask.isNegative() && KnownZero.isNonNegative()) { - APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + if (DemandedMask.isSignBitSet()) { computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, CxtI); // If it's known zero, our sign bit is also zero. - if (LHSKnownZero.isNegative()) + if (LHSKnownZero.isSignBitSet()) KnownZero.setSignBit(); } break; @@ -744,7 +721,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(KnownZero|KnownOne)) return Constant::getIntegerValue(VTy, KnownOne); return nullptr; } @@ -783,17 +760,15 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) return Constant::getIntegerValue(ITy, IKnownOne); // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and' in this // context. - if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == - (DemandedMask & ~LHSKnownZero)) + if (DemandedMask.isSubsetOf(LHSKnownZero | RHSKnownOne)) return I->getOperand(0); - if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == - (DemandedMask & ~RHSKnownZero)) + if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownOne)) return I->getOperand(1); KnownZero = std::move(IKnownZero); @@ -817,26 +792,15 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) return Constant::getIntegerValue(ITy, IKnownOne); // If all of the demanded bits are known zero on one side, return the // other. These bits cannot contribute to the result of the 'or' in this // context. - if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == - (DemandedMask & ~LHSKnownOne)) + if (DemandedMask.isSubsetOf(LHSKnownOne | RHSKnownZero)) return I->getOperand(0); - if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == - (DemandedMask & ~RHSKnownOne)) - return I->getOperand(1); - - // If all of the potentially set bits on one side are known to be set on - // the other side, just use the 'other' side. - if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == - (DemandedMask & (~RHSKnownZero))) - return I->getOperand(0); - if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == - (DemandedMask & (~LHSKnownZero))) + if (DemandedMask.isSubsetOf(RHSKnownOne | LHSKnownZero)) return I->getOperand(1); KnownZero = std::move(IKnownZero); @@ -861,14 +825,14 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) return Constant::getIntegerValue(ITy, IKnownOne); // If all of the demanded bits are known zero on one side, return the // other. - if ((DemandedMask & RHSKnownZero) == DemandedMask) + if (DemandedMask.isSubsetOf(RHSKnownZero)) return I->getOperand(0); - if ((DemandedMask & LHSKnownZero) == DemandedMask) + if (DemandedMask.isSubsetOf(LHSKnownZero)) return I->getOperand(1); // Output known-0 bits are known if clear or set in both the LHS & RHS. @@ -883,7 +847,7 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, // If this user is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(KnownZero|KnownOne)) return Constant::getIntegerValue(ITy, KnownOne); break; @@ -1641,7 +1605,52 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, UndefElts.setHighBits(VWidth / 2); break; case Intrinsic::amdgcn_buffer_load: - case Intrinsic::amdgcn_buffer_load_format: { + case Intrinsic::amdgcn_buffer_load_format: + case Intrinsic::amdgcn_image_sample: + case Intrinsic::amdgcn_image_sample_cl: + case Intrinsic::amdgcn_image_sample_d: + case Intrinsic::amdgcn_image_sample_d_cl: + case Intrinsic::amdgcn_image_sample_l: + case Intrinsic::amdgcn_image_sample_b: + case Intrinsic::amdgcn_image_sample_b_cl: + case Intrinsic::amdgcn_image_sample_lz: + case Intrinsic::amdgcn_image_sample_cd: + case Intrinsic::amdgcn_image_sample_cd_cl: + + case Intrinsic::amdgcn_image_sample_c: + case Intrinsic::amdgcn_image_sample_c_cl: + case Intrinsic::amdgcn_image_sample_c_d: + case Intrinsic::amdgcn_image_sample_c_d_cl: + case Intrinsic::amdgcn_image_sample_c_l: + case Intrinsic::amdgcn_image_sample_c_b: + case Intrinsic::amdgcn_image_sample_c_b_cl: + case Intrinsic::amdgcn_image_sample_c_lz: + case Intrinsic::amdgcn_image_sample_c_cd: + case Intrinsic::amdgcn_image_sample_c_cd_cl: + + case Intrinsic::amdgcn_image_sample_o: + case Intrinsic::amdgcn_image_sample_cl_o: + case Intrinsic::amdgcn_image_sample_d_o: + case Intrinsic::amdgcn_image_sample_d_cl_o: + case Intrinsic::amdgcn_image_sample_l_o: + case Intrinsic::amdgcn_image_sample_b_o: + case Intrinsic::amdgcn_image_sample_b_cl_o: + case Intrinsic::amdgcn_image_sample_lz_o: + case Intrinsic::amdgcn_image_sample_cd_o: + case Intrinsic::amdgcn_image_sample_cd_cl_o: + + case Intrinsic::amdgcn_image_sample_c_o: + case Intrinsic::amdgcn_image_sample_c_cl_o: + case Intrinsic::amdgcn_image_sample_c_d_o: + case Intrinsic::amdgcn_image_sample_c_d_cl_o: + case Intrinsic::amdgcn_image_sample_c_l_o: + case Intrinsic::amdgcn_image_sample_c_b_o: + case Intrinsic::amdgcn_image_sample_c_b_cl_o: + case Intrinsic::amdgcn_image_sample_c_lz_o: + case Intrinsic::amdgcn_image_sample_c_cd_o: + case Intrinsic::amdgcn_image_sample_c_cd_cl_o: + + case Intrinsic::amdgcn_image_getlod: { if (VWidth == 1 || !DemandedElts.isMask()) return nullptr; @@ -1656,8 +1665,17 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, Type *NewTy = (NewNumElts == 1) ? EltTy : VectorType::get(EltTy, NewNumElts); - Function *NewIntrin = Intrinsic::getDeclaration(M, II->getIntrinsicID(), - NewTy); + auto IID = II->getIntrinsicID(); + + bool IsBuffer = IID == Intrinsic::amdgcn_buffer_load || + IID == Intrinsic::amdgcn_buffer_load_format; + + Function *NewIntrin = IsBuffer ? + Intrinsic::getDeclaration(M, IID, NewTy) : + // Samplers have 3 mangled types. + Intrinsic::getDeclaration(M, IID, + { NewTy, II->getArgOperand(0)->getType(), + II->getArgOperand(1)->getType()}); SmallVector<Value *, 5> Args; for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I) @@ -1669,6 +1687,29 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, CallInst *NewCall = Builder->CreateCall(NewIntrin, Args); NewCall->takeName(II); NewCall->copyMetadata(*II); + + if (!IsBuffer) { + ConstantInt *DMask = dyn_cast<ConstantInt>(NewCall->getArgOperand(3)); + if (DMask) { + unsigned DMaskVal = DMask->getZExtValue() & 0xf; + + unsigned PopCnt = 0; + unsigned NewDMask = 0; + for (unsigned I = 0; I < 4; ++I) { + const unsigned Bit = 1 << I; + if (!!(DMaskVal & Bit)) { + if (++PopCnt > NewNumElts) + break; + + NewDMask |= Bit; + } + } + + NewCall->setArgOperand(3, ConstantInt::get(DMask->getType(), NewDMask)); + } + } + + if (NewNumElts == 1) { return Builder->CreateInsertElement(UndefValue::get(V->getType()), NewCall, static_cast<uint64_t>(0)); diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 88ef17bbc8fa..81f2d9fa179f 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -148,9 +148,9 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { bool Overflow = false; if (Opcode == Instruction::Add) - BVal->sadd_ov(*CVal, Overflow); + (void)BVal->sadd_ov(*CVal, Overflow); else - BVal->ssub_ov(*CVal, Overflow); + (void)BVal->ssub_ov(*CVal, Overflow); return !Overflow; } diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 94cfc69ed555..036dd8d39a08 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -2586,7 +2586,7 @@ void FunctionStackPoisoner::processStaticAllocas() { Value *NewAllocaPtr = IRB.CreateIntToPtr( IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)), AI->getType()); - replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, /*Deref=*/true); + replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, /*Deref=*/false); AI->replaceAllUsesWith(NewAllocaPtr); } diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index fa0c7cc5a4c5..8bdd917a0596 100644 --- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -59,13 +59,8 @@ using namespace llvm; static const char *const SanCovModuleInitName = "__sanitizer_cov_module_init"; static const char *const SanCovName = "__sanitizer_cov"; static const char *const SanCovWithCheckName = "__sanitizer_cov_with_check"; -static const char *const SanCovIndirCallName = "__sanitizer_cov_indir_call16"; static const char *const SanCovTracePCIndirName = "__sanitizer_cov_trace_pc_indir"; -static const char *const SanCovTraceEnterName = - "__sanitizer_cov_trace_func_enter"; -static const char *const SanCovTraceBBName = - "__sanitizer_cov_trace_basic_block"; static const char *const SanCovTracePCName = "__sanitizer_cov_trace_pc"; static const char *const SanCovTraceCmp1 = "__sanitizer_cov_trace_cmp1"; static const char *const SanCovTraceCmp2 = "__sanitizer_cov_trace_cmp2"; @@ -86,8 +81,7 @@ static const char *const SanCovTracePCGuardInitName = static cl::opt<int> ClCoverageLevel( "sanitizer-coverage-level", cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, " - "3: all blocks and critical edges, " - "4: above plus indirect calls"), + "3: all blocks and critical edges"), cl::Hidden, cl::init(0)); static cl::opt<unsigned> ClCoverageBlockThreshold( @@ -96,12 +90,6 @@ static cl::opt<unsigned> ClCoverageBlockThreshold( " more than this number of blocks."), cl::Hidden, cl::init(0)); -static cl::opt<bool> - ClExperimentalTracing("sanitizer-coverage-experimental-tracing", - cl::desc("Experimental basic-block tracing: insert " - "callbacks at every basic block"), - cl::Hidden, cl::init(false)); - static cl::opt<bool> ClExperimentalTracePC("sanitizer-coverage-trace-pc", cl::desc("Experimental pc tracing"), cl::Hidden, cl::init(false)); @@ -128,16 +116,6 @@ static cl::opt<bool> cl::desc("Reduce the number of instrumented blocks"), cl::Hidden, cl::init(true)); -// Experimental 8-bit counters used as an additional search heuristic during -// coverage-guided fuzzing. -// The counters are not thread-friendly: -// - contention on these counters may cause significant slowdown; -// - the counter updates are racy and the results may be inaccurate. -// They are also inaccurate due to 8-bit integer overflow. -static cl::opt<bool> ClUse8bitCounters("sanitizer-coverage-8bit-counters", - cl::desc("Experimental 8-bit counters"), - cl::Hidden, cl::init(false)); - namespace { SanitizerCoverageOptions getOptions(int LegacyCoverageLevel) { @@ -168,11 +146,9 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) { SanitizerCoverageOptions CLOpts = getOptions(ClCoverageLevel); Options.CoverageType = std::max(Options.CoverageType, CLOpts.CoverageType); Options.IndirectCalls |= CLOpts.IndirectCalls; - Options.TraceBB |= ClExperimentalTracing; Options.TraceCmp |= ClCMPTracing; Options.TraceDiv |= ClDIVTracing; Options.TraceGep |= ClGEPTracing; - Options.Use8bitCounters |= ClUse8bitCounters; Options.TracePC |= ClExperimentalTracePC; Options.TracePCGuard |= ClTracePCGuard; return Options; @@ -212,16 +188,15 @@ private: bool UseCalls); unsigned NumberOfInstrumentedBlocks() { return SanCovFunction->getNumUses() + - SanCovWithCheckFunction->getNumUses() + SanCovTraceBB->getNumUses() + - SanCovTraceEnter->getNumUses(); + SanCovWithCheckFunction->getNumUses(); } StringRef getSanCovTracePCGuardSection() const; StringRef getSanCovTracePCGuardSectionStart() const; StringRef getSanCovTracePCGuardSectionEnd() const; Function *SanCovFunction; Function *SanCovWithCheckFunction; - Function *SanCovIndirCallFunction, *SanCovTracePCIndir; - Function *SanCovTraceEnter, *SanCovTraceBB, *SanCovTracePC, *SanCovTracePCGuard; + Function *SanCovTracePCIndir; + Function *SanCovTracePC, *SanCovTracePCGuard; Function *SanCovTraceCmpFunction[4]; Function *SanCovTraceDivFunction[2]; Function *SanCovTraceGepFunction; @@ -235,7 +210,6 @@ private: GlobalVariable *GuardArray; GlobalVariable *FunctionGuardArray; // for trace-pc-guard. - GlobalVariable *EightBitCounterArray; bool HasSancovGuardsSection; SanitizerCoverageOptions Options; @@ -267,9 +241,6 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { M.getOrInsertFunction(SanCovWithCheckName, VoidTy, Int32PtrTy)); SanCovTracePCIndir = checkSanitizerInterfaceFunction( M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy)); - SanCovIndirCallFunction = - checkSanitizerInterfaceFunction(M.getOrInsertFunction( - SanCovIndirCallName, VoidTy, IntptrTy, IntptrTy)); SanCovTraceCmpFunction[0] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( SanCovTraceCmp1, VoidTy, IRB.getInt8Ty(), IRB.getInt8Ty())); @@ -305,24 +276,15 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { M.getOrInsertFunction(SanCovTracePCName, VoidTy)); SanCovTracePCGuard = checkSanitizerInterfaceFunction(M.getOrInsertFunction( SanCovTracePCGuardName, VoidTy, Int32PtrTy)); - SanCovTraceEnter = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(SanCovTraceEnterName, VoidTy, Int32PtrTy)); - SanCovTraceBB = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(SanCovTraceBBName, VoidTy, Int32PtrTy)); // At this point we create a dummy array of guards because we don't // know how many elements we will need. Type *Int32Ty = IRB.getInt32Ty(); - Type *Int8Ty = IRB.getInt8Ty(); if (!Options.TracePCGuard) GuardArray = new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage, nullptr, "__sancov_gen_cov_tmp"); - if (Options.Use8bitCounters) - EightBitCounterArray = - new GlobalVariable(M, Int8Ty, false, GlobalVariable::ExternalLinkage, - nullptr, "__sancov_gen_cov_tmp"); for (auto &F : M) runOnFunction(F); @@ -344,20 +306,6 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { GuardArray->eraseFromParent(); } - GlobalVariable *RealEightBitCounterArray; - if (Options.Use8bitCounters) { - // Make sure the array is 16-aligned. - static const int CounterAlignment = 16; - Type *Int8ArrayNTy = ArrayType::get(Int8Ty, alignTo(N, CounterAlignment)); - RealEightBitCounterArray = new GlobalVariable( - M, Int8ArrayNTy, false, GlobalValue::PrivateLinkage, - Constant::getNullValue(Int8ArrayNTy), "__sancov_gen_cov_counter"); - RealEightBitCounterArray->setAlignment(CounterAlignment); - EightBitCounterArray->replaceAllUsesWith( - IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy)); - EightBitCounterArray->eraseFromParent(); - } - // Create variable for module (compilation unit) name Constant *ModNameStrConst = ConstantDataArray::getString(M.getContext(), M.getName(), true); @@ -396,10 +344,7 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { M, SanCovModuleCtorName, SanCovModuleInitName, {Int32PtrTy, IntptrTy, Int8PtrTy, Int8PtrTy}, {IRB.CreatePointerCast(RealGuardArray, Int32PtrTy), - ConstantInt::get(IntptrTy, N), - Options.Use8bitCounters - ? IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy) - : Constant::getNullValue(Int8PtrTy), + ConstantInt::get(IntptrTy, N), Constant::getNullValue(Int8PtrTy), IRB.CreatePointerCast(ModuleName, Int8PtrTy)}); appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority); @@ -566,26 +511,15 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls( Function &F, ArrayRef<Instruction *> IndirCalls) { if (IndirCalls.empty()) return; - const int CacheSize = 16; - const int CacheAlignment = 64; // Align for better performance. - Type *Ty = ArrayType::get(IntptrTy, CacheSize); + if (!Options.TracePC && !Options.TracePCGuard) + return; for (auto I : IndirCalls) { IRBuilder<> IRB(I); CallSite CS(I); Value *Callee = CS.getCalledValue(); if (isa<InlineAsm>(Callee)) continue; - GlobalVariable *CalleeCache = new GlobalVariable( - *F.getParent(), Ty, false, GlobalValue::PrivateLinkage, - Constant::getNullValue(Ty), "__sancov_gen_callee_cache"); - CalleeCache->setAlignment(CacheAlignment); - if (Options.TracePC || Options.TracePCGuard) - IRB.CreateCall(SanCovTracePCIndir, - IRB.CreatePointerCast(Callee, IntptrTy)); - else - IRB.CreateCall(SanCovIndirCallFunction, - {IRB.CreatePointerCast(Callee, IntptrTy), - IRB.CreatePointerCast(CalleeCache, IntptrTy)}); + IRB.CreateCall(SanCovTracePCIndir, IRB.CreatePointerCast(Callee, IntptrTy)); } } @@ -735,9 +669,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, IRB.CreatePointerCast(GuardArray, IntptrTy), ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4)); GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy); - if (Options.TraceBB) { - IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP); - } else if (UseCalls) { + if (UseCalls) { IRB.CreateCall(SanCovWithCheckFunction, GuardP); } else { LoadInst *Load = IRB.CreateLoad(GuardP); @@ -755,19 +687,6 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. } } - - if (Options.Use8bitCounters) { - IRB.SetInsertPoint(&*IP); - Value *P = IRB.CreateAdd( - IRB.CreatePointerCast(EightBitCounterArray, IntptrTy), - ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1)); - P = IRB.CreateIntToPtr(P, IRB.getInt8PtrTy()); - LoadInst *LI = IRB.CreateLoad(P); - Value *Inc = IRB.CreateAdd(LI, ConstantInt::get(IRB.getInt8Ty(), 1)); - StoreInst *SI = IRB.CreateStore(Inc, P); - SetNoSanitizeMetadata(LI); - SetNoSanitizeMetadata(SI); - } } StringRef SanitizerCoverageModule::getSanCovTracePCGuardSection() const { diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp index 6adfe130d148..b7514a6d5793 100644 --- a/lib/Transforms/Scalar/GVNHoist.cpp +++ b/lib/Transforms/Scalar/GVNHoist.cpp @@ -45,6 +45,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ValueTracking.h" @@ -1010,6 +1011,7 @@ public: AU.addRequired<MemorySSAWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); AU.addPreserved<MemorySSAWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); } }; } // namespace @@ -1026,6 +1028,7 @@ PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) { PreservedAnalyses PA; PA.preserve<DominatorTreeAnalysis>(); PA.preserve<MemorySSAAnalysis>(); + PA.preserve<GlobalsAA>(); return PA; } diff --git a/lib/Transforms/Scalar/LoopLoadElimination.cpp b/lib/Transforms/Scalar/LoopLoadElimination.cpp index cf63cb660db8..20b37c4b70e6 100644 --- a/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -197,8 +197,7 @@ public: continue; // Only progagate the value if they are of the same type. - if (Store->getPointerOperand()->getType() != - Load->getPointerOperand()->getType()) + if (Store->getPointerOperandType() != Load->getPointerOperandType()) continue; Candidates.emplace_front(Load, Store); diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp index 86058fe0b1aa..fd15a9014def 100644 --- a/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -557,7 +557,7 @@ bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) { Instruction *UUser = dyn_cast<Instruction>(UU); // Skip SExt if we are extending an nsw value // TODO: Allow ZExt too - if (BO->hasNoSignedWrap() && UUser && UUser->getNumUses() == 1 && + if (BO->hasNoSignedWrap() && UUser && UUser->hasOneUse() && isa<SExtInst>(UUser)) UUser = dyn_cast<Instruction>(*(UUser->user_begin())); if (!isCompareUsedByBranch(UUser)) @@ -852,7 +852,7 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) { for (auto &KV : Roots) { if (KV.first == 0) continue; - if (KV.second->getNumUses() != NumBaseUses) { + if (!KV.second->hasNUses(NumBaseUses)) { DEBUG(dbgs() << "LRR: Aborting - Root and Base #users not the same: " << "#Base=" << NumBaseUses << ", #Root=" << KV.second->getNumUses() << "\n"); @@ -867,7 +867,7 @@ void LoopReroll::DAGRootTracker:: findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) { // Does the user look like it could be part of a root set? // All its users must be simple arithmetic ops. - if (I->getNumUses() > IL_MaxRerollIterations) + if (I->hasNUsesOrMore(IL_MaxRerollIterations + 1)) return; if (I != IV && findRootsBase(I, SubsumedInsts)) diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index 3d8ce888867e..a014ddd9ba0a 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -138,7 +138,8 @@ PHIExpression::~PHIExpression() = default; // It also wants to hand us SCC's that are unrelated to the phi node we ask // about, and have us process them there or risk redoing work. // Graph traits over a filter iterator also doesn't work that well here. -// This SCC finder is specialized to walk use-def chains, and only follows instructions, +// This SCC finder is specialized to walk use-def chains, and only follows +// instructions, // not generic values (arguments, etc). struct TarjanSCC { @@ -170,8 +171,10 @@ private: Root[I] = std::min(Root.lookup(I), Root.lookup(Op)); } } - // See if we really were the root of a component, by seeing if we still have our DFSNumber. - // If we do, we are the root of the component, and we have completed a component. If we do not, + // See if we really were the root of a component, by seeing if we still have + // our DFSNumber. + // If we do, we are the root of the component, and we have completed a + // component. If we do not, // we are not the root of a component, and belong on the component stack. if (Root.lookup(I) == OurDFS) { unsigned ComponentID = Components.size(); @@ -2254,12 +2257,13 @@ void NewGVN::initializeCongruenceClasses(Function &F) { MemoryAccessToClass[MSSA->getLiveOnEntryDef()] = createMemoryClass(MSSA->getLiveOnEntryDef()); - for (auto &B : F) { + for (auto DTN : nodes(DT)) { + BasicBlock *BB = DTN->getBlock(); // All MemoryAccesses are equivalent to live on entry to start. They must // be initialized to something so that initial changes are noticed. For // the maximal answer, we initialize them all to be the same as // liveOnEntry. - auto *MemoryBlockDefs = MSSA->getBlockDefs(&B); + auto *MemoryBlockDefs = MSSA->getBlockDefs(BB); if (MemoryBlockDefs) for (const auto &Def : *MemoryBlockDefs) { MemoryAccessToClass[&Def] = TOPClass; @@ -2274,7 +2278,7 @@ void NewGVN::initializeCongruenceClasses(Function &F) { if (MD && isa<StoreInst>(MD->getMemoryInst())) TOPClass->incStoreCount(); } - for (auto &I : B) { + for (auto &I : *BB) { // Don't insert void terminators into the class. We don't value number // them, and they just end up sitting in TOP. if (isa<TerminatorInst>(I) && I.getType()->isVoidTy()) @@ -2518,14 +2522,11 @@ void NewGVN::verifyMemoryCongruency() const { auto ReachableAccessPred = [&](const std::pair<const MemoryAccess *, CongruenceClass *> Pair) { bool Result = ReachableBlocks.count(Pair.first->getBlock()); - if (!Result) + if (!Result || MSSA->isLiveOnEntryDef(Pair.first) || + MemoryToDFSNum(Pair.first) == 0) return false; - if (MSSA->isLiveOnEntryDef(Pair.first)) - return true; if (auto *MemDef = dyn_cast<MemoryDef>(Pair.first)) return !isInstructionTriviallyDead(MemDef->getMemoryInst()); - if (MemoryToDFSNum(Pair.first) == 0) - return false; return true; }; @@ -2719,25 +2720,13 @@ bool NewGVN::runGVN() { } // Now a standard depth first ordering of the domtree is equivalent to RPO. - auto DFI = df_begin(DT->getRootNode()); - for (auto DFE = df_end(DT->getRootNode()); DFI != DFE; ++DFI) { - BasicBlock *B = DFI->getBlock(); + for (auto DTN : depth_first(DT->getRootNode())) { + BasicBlock *B = DTN->getBlock(); const auto &BlockRange = assignDFSNumbers(B, ICount); BlockInstRange.insert({B, BlockRange}); ICount += BlockRange.second - BlockRange.first; } - // Handle forward unreachable blocks and figure out which blocks - // have single preds. - for (auto &B : F) { - // Assign numbers to unreachable blocks. - if (!DFI.nodeVisited(DT->getNode(&B))) { - const auto &BlockRange = assignDFSNumbers(&B, ICount); - BlockInstRange.insert({&B, BlockRange}); - ICount += BlockRange.second - BlockRange.first; - } - } - TouchedInstructions.resize(ICount); // Ensure we don't end up resizing the expressionToClass map, as // that can be quite expensive. At most, we have one expression per diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index 49ce0262c97b..659353e912fe 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -352,10 +352,20 @@ Value *StructurizeCFG::invert(Value *Condition) { if (Instruction *Inst = dyn_cast<Instruction>(Condition)) { // Third: Check all the users for an invert BasicBlock *Parent = Inst->getParent(); - for (User *U : Condition->users()) - if (Instruction *I = dyn_cast<Instruction>(U)) + for (User *U : Condition->users()) { + if (Instruction *I = dyn_cast<Instruction>(U)) { if (I->getParent() == Parent && match(I, m_Not(m_Specific(Condition)))) return I; + } + } + + // Avoid creating a new instruction in the common case of a compare. + if (CmpInst *Cmp = dyn_cast<CmpInst>(Inst)) { + if (Cmp->hasOneUse()) { + Cmp->setPredicate(Cmp->getInversePredicate()); + return Cmp; + } + } // Last option: Create a new instruction return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator()); diff --git a/lib/Transforms/Utils/CmpInstAnalysis.cpp b/lib/Transforms/Utils/CmpInstAnalysis.cpp index 60ae3745c835..9f4d9c7e3981 100644 --- a/lib/Transforms/Utils/CmpInstAnalysis.cpp +++ b/lib/Transforms/Utils/CmpInstAnalysis.cpp @@ -73,17 +73,17 @@ bool llvm::decomposeBitTestICmp(const ICmpInst *I, CmpInst::Predicate &Pred, default: return false; case ICmpInst::ICMP_SLT: - // X < 0 is equivalent to (X & SignBit) != 0. + // X < 0 is equivalent to (X & SignMask) != 0. if (!C->isZero()) return false; - Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth())); + Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth())); Pred = ICmpInst::ICMP_NE; break; case ICmpInst::ICMP_SGT: - // X > -1 is equivalent to (X & SignBit) == 0. + // X > -1 is equivalent to (X & SignMask) == 0. if (!C->isAllOnesValue()) return false; - Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth())); + Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth())); Pred = ICmpInst::ICMP_EQ; break; case ICmpInst::ICMP_ULT: diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 644d93b727b3..82552684b832 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -112,24 +112,6 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs) { return buildExtractionBlockSet(BBs.begin(), BBs.end()); } -/// \brief Helper to call buildExtractionBlockSet with a RegionNode. -static SetVector<BasicBlock *> -buildExtractionBlockSet(const RegionNode &RN) { - if (!RN.isSubRegion()) - // Just a single BasicBlock. - return buildExtractionBlockSet(RN.getNodeAs<BasicBlock>()); - - const Region &R = *RN.getNodeAs<Region>(); - - return buildExtractionBlockSet(R.block_begin(), R.block_end()); -} - -CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs, - BlockFrequencyInfo *BFI, - BranchProbabilityInfo *BPI) - : DT(nullptr), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {} - CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI) @@ -143,12 +125,6 @@ CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks())), NumExitBlocks(~0U) {} -CodeExtractor::CodeExtractor(DominatorTree &DT, const RegionNode &RN, - bool AggregateArgs, BlockFrequencyInfo *BFI, - BranchProbabilityInfo *BPI) - : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(RN)), NumExitBlocks(~0U) {} - /// definedInRegion - Return true if the specified value is defined in the /// extracted region. static bool definedInRegion(const SetVector<BasicBlock *> &Blocks, Value *V) { diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index 49b4bd92faf4..089f2b5f3b18 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -85,6 +85,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, UsesToRewrite.clear(); Instruction *I = Worklist.pop_back_val(); + assert(!I->getType()->isTokenTy() && "Tokens shouldn't be in the worklist"); BasicBlock *InstBB = I->getParent(); Loop *L = LI.getLoopFor(InstBB); assert(L && "Instruction belongs to a BB that's not part of a loop"); @@ -96,13 +97,6 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, if (ExitBlocks.empty()) continue; - // Tokens cannot be used in PHI nodes, so we skip over them. - // We can run into tokens which are live out of a loop with catchswitch - // instructions in Windows EH if the catchswitch has one catchpad which - // is inside the loop and another which is not. - if (I->getType()->isTokenTy()) - continue; - for (Use &U : I->uses()) { Instruction *User = cast<Instruction>(U.getUser()); BasicBlock *UserBB = User->getParent(); @@ -214,13 +208,9 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, // Post process PHI instructions that were inserted into another disjoint // loop and update their exits properly. - for (auto *PostProcessPN : PostProcessPHIs) { - if (PostProcessPN->use_empty()) - continue; - - // Reprocess each PHI instruction. - Worklist.push_back(PostProcessPN); - } + for (auto *PostProcessPN : PostProcessPHIs) + if (!PostProcessPN->use_empty()) + Worklist.push_back(PostProcessPN); // Keep track of PHI nodes that we want to remove because they did not have // any uses rewritten. @@ -241,7 +231,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, // Compute the set of BasicBlocks in the loop `L` dominating at least one exit. static void computeBlocksDominatingExits( Loop &L, DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks, - SmallPtrSet<BasicBlock *, 8> &BlocksDominatingExits) { + SmallSetVector<BasicBlock *, 8> &BlocksDominatingExits) { SmallVector<BasicBlock *, 8> BBWorklist; // We start from the exit blocks, as every block trivially dominates itself @@ -279,7 +269,7 @@ static void computeBlocksDominatingExits( if (!L.contains(IDomBB)) continue; - if (BlocksDominatingExits.insert(IDomBB).second) + if (BlocksDominatingExits.insert(IDomBB)) BBWorklist.push_back(IDomBB); } } @@ -293,7 +283,7 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, if (ExitBlocks.empty()) return false; - SmallPtrSet<BasicBlock *, 8> BlocksDominatingExits; + SmallSetVector<BasicBlock *, 8> BlocksDominatingExits; // We want to avoid use-scanning leveraging dominance informations. // If a block doesn't dominate any of the loop exits, the none of the values @@ -315,6 +305,13 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, !isa<PHINode>(I.user_back()))) continue; + // Tokens cannot be used in PHI nodes, so we skip over them. + // We can run into tokens which are live out of a loop with catchswitch + // instructions in Windows EH if the catchswitch has one catchpad which + // is inside the loop and another which is not. + if (I.getType()->isTokenTy()) + continue; + Worklist.push_back(&I); } } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 18b29226c2ef..8c5442762643 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -1227,13 +1227,9 @@ bool llvm::LowerDbgDeclare(Function &F) { // This is a call by-value or some other instruction that // takes a pointer to the variable. Insert a *value* // intrinsic that describes the alloca. - SmallVector<uint64_t, 1> NewDIExpr; - auto *DIExpr = DDI->getExpression(); - NewDIExpr.push_back(dwarf::DW_OP_deref); - NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); DIB.insertDbgValueIntrinsic(AI, 0, DDI->getVariable(), - DIB.createExpression(NewDIExpr), - DDI->getDebugLoc(), CI); + DDI->getExpression(), DDI->getDebugLoc(), + CI); } } DDI->eraseFromParent(); diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp index 73c14f5606b7..5c21490793e7 100644 --- a/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -46,6 +46,11 @@ static cl::opt<unsigned> UnrollForcePeelCount( "unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information.")); +// Designates that a Phi is estimated to become invariant after an "infinite" +// number of loop iterations (i.e. only may become an invariant if the loop is +// fully unrolled). +static const unsigned InfiniteIterationsToInvariance = UINT_MAX; + // Check whether we are capable of peeling this loop. static bool canPeel(Loop *L) { // Make sure the loop is in simplified form @@ -66,10 +71,62 @@ static bool canPeel(Loop *L) { return true; } +// This function calculates the number of iterations after which the given Phi +// becomes an invariant. The pre-calculated values are memorized in the map. The +// function (shortcut is I) is calculated according to the following definition: +// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge]. +// If %y is a loop invariant, then I(%x) = 1. +// If %y is a Phi from the loop header, I(%x) = I(%y) + 1. +// Otherwise, I(%x) is infinite. +// TODO: Actually if %y is an expression that depends only on Phi %z and some +// loop invariants, we can estimate I(%x) = I(%z) + 1. The example +// looks like: +// %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration. +// %y = phi(0, 5), +// %a = %y + 1. +static unsigned calculateIterationsToInvariance( + PHINode *Phi, Loop *L, BasicBlock *BackEdge, + SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) { + assert(Phi->getParent() == L->getHeader() && + "Non-loop Phi should not be checked for turning into invariant."); + assert(BackEdge == L->getLoopLatch() && "Wrong latch?"); + // If we already know the answer, take it from the map. + auto I = IterationsToInvariance.find(Phi); + if (I != IterationsToInvariance.end()) + return I->second; + + // Otherwise we need to analyze the input from the back edge. + Value *Input = Phi->getIncomingValueForBlock(BackEdge); + // Place infinity to map to avoid infinite recursion for cycled Phis. Such + // cycles can never stop on an invariant. + IterationsToInvariance[Phi] = InfiniteIterationsToInvariance; + unsigned ToInvariance = InfiniteIterationsToInvariance; + + if (L->isLoopInvariant(Input)) + ToInvariance = 1u; + else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) { + // Only consider Phis in header block. + if (IncPhi->getParent() != L->getHeader()) + return InfiniteIterationsToInvariance; + // If the input becomes an invariant after X iterations, then our Phi + // becomes an invariant after X + 1 iterations. + unsigned InputToInvariance = calculateIterationsToInvariance( + IncPhi, L, BackEdge, IterationsToInvariance); + if (InputToInvariance != InfiniteIterationsToInvariance) + ToInvariance = InputToInvariance + 1u; + } + + // If we found that this Phi lies in an invariant chain, update the map. + if (ToInvariance != InfiniteIterationsToInvariance) + IterationsToInvariance[Phi] = ToInvariance; + return ToInvariance; +} + // Return the number of iterations we want to peel off. void llvm::computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, unsigned &TripCount) { + assert(LoopSize > 0 && "Zero loop size is not allowed!"); UP.PeelCount = 0; if (!canPeel(L)) return; @@ -78,30 +135,37 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (!L->empty()) return; - // Try to find a Phi node that has the same loop invariant as an input from - // its only back edge. If there is such Phi, peeling 1 iteration from the - // loop is profitable, because starting from 2nd iteration we will have an - // invariant instead of this Phi. - if (LoopSize <= UP.Threshold) { + // Here we try to get rid of Phis which become invariants after 1, 2, ..., N + // iterations of the loop. For this we compute the number for iterations after + // which every Phi is guaranteed to become an invariant, and try to peel the + // maximum number of iterations among these values, thus turning all those + // Phis into invariants. + // First, check that we can peel at least one iteration. + if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) { + // Store the pre-calculated values here. + SmallDenseMap<PHINode *, unsigned> IterationsToInvariance; + // Now go through all Phis to calculate their the number of iterations they + // need to become invariants. + unsigned DesiredPeelCount = 0; BasicBlock *BackEdge = L->getLoopLatch(); assert(BackEdge && "Loop is not in simplified form?"); - BasicBlock *Header = L->getHeader(); - // Iterate over Phis to find one with invariant input on back edge. - bool FoundCandidate = false; - PHINode *Phi; - for (auto BI = Header->begin(); isa<PHINode>(&*BI); ++BI) { - Phi = cast<PHINode>(&*BI); - Value *Input = Phi->getIncomingValueForBlock(BackEdge); - if (L->isLoopInvariant(Input)) { - FoundCandidate = true; - break; - } + for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) { + PHINode *Phi = cast<PHINode>(&*BI); + unsigned ToInvariance = calculateIterationsToInvariance( + Phi, L, BackEdge, IterationsToInvariance); + if (ToInvariance != InfiniteIterationsToInvariance) + DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance); } - if (FoundCandidate) { - DEBUG(dbgs() << "Peel one iteration to get rid of " << *Phi - << " because starting from 2nd iteration it is always" - << " an invariant\n"); - UP.PeelCount = 1; + if (DesiredPeelCount > 0) { + // Pay respect to limitations implied by loop size and the max peel count. + unsigned MaxPeelCount = UnrollPeelMaxCount; + MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1); + DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount); + // Consider max peel count limitation. + assert(DesiredPeelCount > 0 && "Wrong loop size estimation?"); + DEBUG(dbgs() << "Peel " << DesiredPeelCount << " iteration(s) to turn" + << " some Phis into invariants.\n"); + UP.PeelCount = DesiredPeelCount; return; } } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 127a44df5344..2f575b9d5027 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -3086,7 +3086,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) || (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB))) return false; - if (PostBB->getNumUses() != 2 || QBI->getParent()->getNumUses() != 2) + if (!PostBB->hasNUses(2) || !QBI->getParent()->hasNUses(2)) return false; // OK, this is a sequence of two diamonds or triangles. diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp index 4aeea02b1b1b..83bd29dbca65 100644 --- a/lib/Transforms/Utils/VNCoercion.cpp +++ b/lib/Transforms/Utils/VNCoercion.cpp @@ -24,6 +24,11 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy)) return false; + // Don't coerce non-integral pointers to integers or vice versa. + if (DL.isNonIntegralPointerType(StoredVal->getType()) != + DL.isNonIntegralPointerType(LoadTy)) + return false; + return true; } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 595b2ec88943..7eb8fabe0b2f 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -422,7 +422,8 @@ protected: // When we if-convert we need to create edge masks. We have to cache values // so that we don't end up with exponential recursion/IR. typedef DenseMap<std::pair<BasicBlock *, BasicBlock *>, VectorParts> - EdgeMaskCache; + EdgeMaskCacheTy; + typedef DenseMap<BasicBlock *, VectorParts> BlockMaskCacheTy; /// Create an empty loop, based on the loop ranges of the old loop. void createEmptyLoop(); @@ -785,7 +786,8 @@ protected: /// Store instructions that should be predicated, as a pair /// <StoreInst, Predicate> SmallVector<std::pair<Instruction *, Value *>, 4> PredicatedInstructions; - EdgeMaskCache MaskCache; + EdgeMaskCacheTy EdgeMaskCache; + BlockMaskCacheTy BlockMaskCache; /// Trip count of the original loop. Value *TripCount; /// Trip count of the widened loop (TripCount - TripCount % (VF*UF)) @@ -4560,8 +4562,8 @@ InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) { // Look for cached value. std::pair<BasicBlock *, BasicBlock *> Edge(Src, Dst); - EdgeMaskCache::iterator ECEntryIt = MaskCache.find(Edge); - if (ECEntryIt != MaskCache.end()) + EdgeMaskCacheTy::iterator ECEntryIt = EdgeMaskCache.find(Edge); + if (ECEntryIt != EdgeMaskCache.end()) return ECEntryIt->second; VectorParts SrcMask = createBlockInMask(Src); @@ -4580,11 +4582,11 @@ InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) { for (unsigned part = 0; part < UF; ++part) EdgeMask[part] = Builder.CreateAnd(EdgeMask[part], SrcMask[part]); - MaskCache[Edge] = EdgeMask; + EdgeMaskCache[Edge] = EdgeMask; return EdgeMask; } - MaskCache[Edge] = SrcMask; + EdgeMaskCache[Edge] = SrcMask; return SrcMask; } @@ -4592,10 +4594,17 @@ InnerLoopVectorizer::VectorParts InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) { assert(OrigLoop->contains(BB) && "Block is not a part of a loop"); + // Look for cached value. + BlockMaskCacheTy::iterator BCEntryIt = BlockMaskCache.find(BB); + if (BCEntryIt != BlockMaskCache.end()) + return BCEntryIt->second; + // Loop incoming mask is all-one. if (OrigLoop->getHeader() == BB) { Value *C = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1); - return getVectorValue(C); + const VectorParts &BlockMask = getVectorValue(C); + BlockMaskCache[BB] = BlockMask; + return BlockMask; } // This is the block mask. We OR all incoming edges, and with zero. @@ -4609,6 +4618,7 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) { BlockMask[part] = Builder.CreateOr(BlockMask[part], EM[part]); } + BlockMaskCache[BB] = BlockMask; return BlockMask; } diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index da3ac06ab464..554944404708 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4146,8 +4146,8 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, if (AllowReorder && R.shouldReorder()) { // Conceptually, there is nothing actually preventing us from trying to // reorder a larger list. In fact, we do exactly this when vectorizing - // reductions. However, at this point, we only expect to get here from - // tryToVectorizePair(). + // reductions. However, at this point, we only expect to get here when + // there are exactly two operations. assert(Ops.size() == 2); assert(BuildVectorSlice.empty()); Value *ReorderedOps[] = {Ops[1], Ops[0]}; @@ -4904,7 +4904,13 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { // Try to vectorize them. unsigned NumElts = (SameTypeIt - IncIt); DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n"); - if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R)) { + // The order in which the phi nodes appear in the program does not matter. + // So allow tryToVectorizeList to reorder them if it is beneficial. This + // is done when there are exactly two elements since tryToVectorizeList + // asserts that there are only two values when AllowReorder is true. + bool AllowReorder = NumElts == 2; + if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, + None, AllowReorder)) { // Success start over because instructions might have been changed. HaveVectorizedPhiNodes = true; Changed = true; |