diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-04-26 19:45:00 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-04-26 19:45:00 +0000 |
commit | 12f3ca4cdb95b193af905a00e722a4dcb40b3de3 (patch) | |
tree | ae1a7fcfc24a8d4b23206c57121c3f361d4b7f84 /lib/Transforms/Utils | |
parent | d99dafe2e4a385dd2a6c76da6d8258deb100657b (diff) |
Diffstat (limited to 'lib/Transforms/Utils')
-rw-r--r-- | lib/Transforms/Utils/BypassSlowDivision.cpp | 9 | ||||
-rw-r--r-- | lib/Transforms/Utils/CodeExtractor.cpp | 43 | ||||
-rw-r--r-- | lib/Transforms/Utils/Local.cpp | 54 | ||||
-rw-r--r-- | lib/Transforms/Utils/LoopUnroll.cpp | 14 | ||||
-rw-r--r-- | lib/Transforms/Utils/LowerSwitch.cpp | 8 | ||||
-rw-r--r-- | lib/Transforms/Utils/SimplifyCFG.cpp | 23 | ||||
-rw-r--r-- | lib/Transforms/Utils/SimplifyInstructions.cpp | 18 | ||||
-rw-r--r-- | lib/Transforms/Utils/SimplifyLibCalls.cpp | 38 |
8 files changed, 134 insertions, 73 deletions
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index 1cfe3bd53648..7ffdad597a9b 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -256,14 +257,14 @@ ValueRange FastDivInsertionTask::getValueRange(Value *V, unsigned HiBits = LongLen - ShortLen; const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout(); - APInt Zeros(LongLen, 0), Ones(LongLen, 0); + KnownBits Known(LongLen); - computeKnownBits(V, Zeros, Ones, DL); + computeKnownBits(V, Known, DL); - if (Zeros.countLeadingOnes() >= HiBits) + if (Known.Zero.countLeadingOnes() >= HiBits) return VALRNG_KNOWN_SHORT; - if (Ones.countLeadingZeros() < HiBits) + if (Known.One.countLeadingZeros() < HiBits) return VALRNG_LIKELY_LONG; // Long integer divisions are often used in hashtable implementations. It's diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 82552684b832..ed72099ec3ed 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -73,24 +73,26 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) { } /// \brief Build a set of blocks to extract if the input blocks are viable. -template <typename IteratorT> -static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin, - IteratorT BBEnd) { +static SetVector<BasicBlock *> +buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT) { + assert(!BBs.empty() && "The set of blocks to extract must be non-empty"); SetVector<BasicBlock *> Result; - assert(BBBegin != BBEnd); - // Loop over the blocks, adding them to our set-vector, and aborting with an // empty set if we encounter invalid blocks. - do { - if (!Result.insert(*BBBegin)) - llvm_unreachable("Repeated basic blocks in extraction input"); + for (BasicBlock *BB : BBs) { - if (!CodeExtractor::isBlockValidForExtraction(**BBBegin)) { + // If this block is dead, don't process it. + if (DT && !DT->isReachableFromEntry(BB)) + continue; + + if (!Result.insert(BB)) + llvm_unreachable("Repeated basic blocks in extraction input"); + if (!CodeExtractor::isBlockValidForExtraction(*BB)) { Result.clear(); return Result; } - } while (++BBBegin != BBEnd); + } #ifndef NDEBUG for (SetVector<BasicBlock *>::iterator I = std::next(Result.begin()), @@ -106,23 +108,17 @@ static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin, return Result; } -/// \brief Helper to call buildExtractionBlockSet with an ArrayRef. -static SetVector<BasicBlock *> -buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs) { - return buildExtractionBlockSet(BBs.begin(), BBs.end()); -} - CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(BBs)), NumExitBlocks(~0U) {} + BPI(BPI), Blocks(buildExtractionBlockSet(BBs, DT)), NumExitBlocks(~0U) {} CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI) : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks())), + BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT)), NumExitBlocks(~0U) {} /// definedInRegion - Return true if the specified value is defined in the @@ -194,9 +190,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // containing PHI nodes merging values from outside of the region, and a // second that contains all of the code for the block and merges back any // incoming values from inside of the region. - BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI()->getIterator(); - BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs, - Header->getName()+".ce"); + BasicBlock *NewBB = llvm::SplitBlock(Header, Header->getFirstNonPHI(), DT); // We only want to code extract the second block now, and it becomes the new // header of the region. @@ -205,11 +199,6 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { Blocks.insert(NewBB); Header = NewBB; - // Okay, update dominator sets. The blocks that dominate the new one are the - // blocks that dominate TIBB plus the new block itself. - if (DT) - DT->splitBlock(NewBB); - // Okay, now we need to adjust the PHI nodes and any branches from within the // region to go to the new header block instead of the old header block. if (NumPredsFromRegion) { @@ -224,12 +213,14 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // Okay, everything within the region is now branching to the right block, we // just have to update the PHI nodes now, inserting PHI nodes into NewBB. + BasicBlock::iterator AfterPHIs; for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) { PHINode *PN = cast<PHINode>(AfterPHIs); // Create a new PHI node in the new region, which has an incoming value // from OldPred of PN. PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, PN->getName() + ".ce", &NewBB->front()); + PN->replaceAllUsesWith(NewPN); NewPN->addIncoming(PN, OldPred); // Loop over all of the incoming value in PN, moving them to NewPN if they diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 8c5442762643..d3002c5fb750 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -45,6 +45,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -1038,9 +1039,9 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, "getOrEnforceKnownAlignment expects a pointer!"); unsigned BitWidth = DL.getPointerTypeSizeInBits(V->getType()); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, CxtI, DT); - unsigned TrailZ = KnownZero.countTrailingOnes(); + KnownBits Known(BitWidth); + computeKnownBits(V, Known, DL, 0, AC, CxtI, DT); + unsigned TrailZ = Known.Zero.countTrailingOnes(); // Avoid trouble with ridiculously large TrailZ values, such as // those computed from a null pointer. @@ -1268,21 +1269,37 @@ static void appendOffset(SmallVectorImpl<uint64_t> &Ops, int64_t Offset) { } } -/// Prepend \p DIExpr with a deref and offset operation. +enum { WithStackValue = true }; + +/// Prepend \p DIExpr with a deref and offset operation and optionally turn it +/// into a stack value. static DIExpression *prependDIExpr(DIBuilder &Builder, DIExpression *DIExpr, - bool Deref, int64_t Offset) { - if (!Deref && !Offset) + bool Deref, int64_t Offset = 0, + bool StackValue = false) { + if (!Deref && !Offset && !StackValue) return DIExpr; - // Create a copy of the original DIDescriptor for user variable, prepending - // "deref" operation to a list of address elements, as new llvm.dbg.declare - // will take a value storing address of the memory for variable, not - // alloca itself. - SmallVector<uint64_t, 4> Ops; + + SmallVector<uint64_t, 8> Ops; + appendOffset(Ops, Offset); if (Deref) Ops.push_back(dwarf::DW_OP_deref); - appendOffset(Ops, Offset); if (DIExpr) - Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); + for (auto Op : DIExpr->expr_ops()) { + // A DW_OP_stack_value comes at the end, but before a DW_OP_LLVM_fragment. + if (StackValue) { + if (Op.getOp() == dwarf::DW_OP_stack_value) + StackValue = false; + else if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) { + Ops.push_back(dwarf::DW_OP_stack_value); + StackValue = false; + } + } + Ops.push_back(Op.getOp()); + for (unsigned I = 0; I < Op.getNumArgs(); ++I) + Ops.push_back(Op.getArg(I)); + } + if (StackValue) + Ops.push_back(dwarf::DW_OP_stack_value); return Builder.createExpression(Ops); } @@ -1374,12 +1391,15 @@ void llvm::salvageDebugInfo(Instruction &I) { unsigned BitWidth = M.getDataLayout().getPointerSizeInBits(GEP->getPointerAddressSpace()); APInt Offset(BitWidth, 0); - // Rewrite a constant GEP into a DIExpression. + // Rewrite a constant GEP into a DIExpression. Since we are performing + // arithmetic to compute the variable's *value* in the DIExpression, we + // need to mark the expression with a DW_OP_stack_value. if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) { auto *DIExpr = DVI->getExpression(); DIBuilder DIB(M, /*AllowUnresolved*/ false); - // GEP offsets are i32 and thus alwaus fit into an int64_t. - DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue()); + // GEP offsets are i32 and thus always fit into an int64_t. + DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue(), + WithStackValue); DVI->setOperand(0, MDWrap(I.getOperand(0))); DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr)); DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); @@ -1391,7 +1411,7 @@ void llvm::salvageDebugInfo(Instruction &I) { // Rewrite the load into DW_OP_deref. auto *DIExpr = DVI->getExpression(); DIBuilder DIB(M, /*AllowUnresolved*/ false); - DIExpr = prependDIExpr(DIB, DIExpr, WithDeref, 0); + DIExpr = prependDIExpr(DIB, DIExpr, WithDeref); DVI->setOperand(0, MDWrap(I.getOperand(0))); DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr)); DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 3c669ce644e2..43ab725b0769 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -318,6 +318,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, return false; } + // The current loop unroll pass can only unroll loops with a single latch + // that's a conditional branch exiting the loop. + // FIXME: The implementation can be extended to work with more complicated + // cases, e.g. loops with multiple latches. BasicBlock *Header = L->getHeader(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); @@ -328,6 +332,16 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, return false; } + auto CheckSuccessors = [&](unsigned S1, unsigned S2) { + return BI->getSuccessor(S1) == Header && !L->contains(BI->getSuccessor(S2)); + }; + + if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) { + DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch" + " exiting the loop can be unrolled\n"); + return false; + } + if (Header->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index b375d51005d5..8959e77438e9 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -403,6 +403,14 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, Value *Val = SI->getCondition(); // The value we are switching on... BasicBlock* Default = SI->getDefaultDest(); + // Don't handle unreachable blocks. If there are successors with phis, this + // would leave them behind with missing predecessors. + if ((CurBlock != &F->getEntryBlock() && pred_empty(CurBlock)) || + CurBlock->getSinglePredecessor() == CurBlock) { + DeleteList.insert(CurBlock); + return; + } + // If there is only the default destination, just branch. if (!SI->getNumCases()) { BranchInst::Create(Default, CurBlock); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 2f575b9d5027..f86e97b6cc72 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -60,6 +60,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -3055,6 +3056,15 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { BasicBlock *QFB = QBI->getSuccessor(1); BasicBlock *PostBB = QFB->getSingleSuccessor(); + // Make sure we have a good guess for PostBB. If QTB's only successor is + // QFB, then QFB is a better PostBB. + if (QTB->getSingleSuccessor() == QFB) + PostBB = QFB; + + // If we couldn't find a good PostBB, stop. + if (!PostBB) + return false; + bool InvertPCond = false, InvertQCond = false; // Canonicalize fallthroughs to the true branches. if (PFB == QBI->getParent()) { @@ -3079,8 +3089,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) { return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S; }; - if (!PostBB || - !HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) || + if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) || !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB)) return false; if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) || @@ -3746,7 +3755,7 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) { if (!isa<DbgInfoIntrinsic>(I)) return false; - SmallSet<BasicBlock *, 4> TrivialUnwindBlocks; + SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks; auto *PhiLPInst = cast<PHINode>(RI->getValue()); // Check incoming blocks to see if any of them are trivial. @@ -4359,8 +4368,8 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, const DataLayout &DL) { Value *Cond = SI->getCondition(); unsigned Bits = Cond->getType()->getIntegerBitWidth(); - APInt KnownZero(Bits, 0), KnownOne(Bits, 0); - computeKnownBits(Cond, KnownZero, KnownOne, DL, 0, AC, SI); + KnownBits Known(Bits); + computeKnownBits(Cond, Known, DL, 0, AC, SI); // We can also eliminate cases by determining that their values are outside of // the limited range of the condition based on how many significant (non-sign) @@ -4372,7 +4381,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, SmallVector<ConstantInt *, 8> DeadCases; for (auto &Case : SI->cases()) { APInt CaseVal = Case.getCaseValue()->getValue(); - if ((CaseVal & KnownZero) != 0 || (CaseVal & KnownOne) != KnownOne || + if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) || (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) { DeadCases.push_back(Case.getCaseValue()); DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal << " is dead.\n"); @@ -4386,7 +4395,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, bool HasDefault = !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); const unsigned NumUnknownBits = - Bits - (KnownZero | KnownOne).countPopulation(); + Bits - (Known.Zero | Known.One).countPopulation(); assert(NumUnknownBits <= Bits); if (HasDefault && DeadCases.empty() && NumUnknownBits < 64 /* avoid overflow */ && diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index f6070868de44..27373427d4f7 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -35,10 +35,8 @@ using namespace llvm; STATISTIC(NumSimplified, "Number of redundant instructions removed"); -static bool runImpl(Function &F, const DominatorTree *DT, - const TargetLibraryInfo *TLI, AssumptionCache *AC, +static bool runImpl(Function &F, const SimplifyQuery &SQ, OptimizationRemarkEmitter *ORE) { - const DataLayout &DL = F.getParent()->getDataLayout(); SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; bool Changed = false; @@ -56,7 +54,8 @@ static bool runImpl(Function &F, const DominatorTree *DT, // Don't waste time simplifying unused instructions. if (!I->use_empty()) { - if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC, ORE)) { + if (Value *V = + SimplifyInstruction(I, SQ.getWithInstruction(I), ORE)) { // Mark all uses for resimplification next time round the loop. for (User *U : I->users()) Next->insert(cast<Instruction>(U)); @@ -65,7 +64,7 @@ static bool runImpl(Function &F, const DominatorTree *DT, Changed = true; } } - if (RecursivelyDeleteTriviallyDeadInstructions(I, TLI)) { + if (RecursivelyDeleteTriviallyDeadInstructions(I, SQ.TLI)) { // RecursivelyDeleteTriviallyDeadInstruction can remove more than one // instruction, so simply incrementing the iterator does not work. // When instructions get deleted re-iterate instead. @@ -113,8 +112,9 @@ namespace { &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); OptimizationRemarkEmitter *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); - - return runImpl(F, DT, TLI, AC, ORE); + const DataLayout &DL = F.getParent()->getDataLayout(); + const SimplifyQuery SQ(DL, TLI, DT, AC); + return runImpl(F, SQ, ORE); } }; } @@ -141,7 +141,9 @@ PreservedAnalyses InstSimplifierPass::run(Function &F, auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); auto &AC = AM.getResult<AssumptionAnalysis>(F); auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); - bool Changed = runImpl(F, &DT, &TLI, &AC, &ORE); + const DataLayout &DL = F.getParent()->getDataLayout(); + const SimplifyQuery SQ(DL, &TLI, &DT, &AC); + bool Changed = runImpl(F, SQ, &ORE); if (!Changed) return PreservedAnalyses::all(); diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index aa71e3669ea2..2c1c30463a23 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" @@ -37,10 +38,6 @@ using namespace llvm; using namespace PatternMatch; static cl::opt<bool> - ColdErrorCalls("error-reporting-is-cold", cl::init(true), cl::Hidden, - cl::desc("Treat error-reporting calls as cold")); - -static cl::opt<bool> EnableUnsafeFPShrink("enable-double-float-shrink", cl::Hidden, cl::init(false), cl::desc("Enable unsafe double to float " @@ -459,11 +456,9 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { Value *Offset = GEP->getOperand(2); unsigned BitWidth = Offset->getType()->getIntegerBitWidth(); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(Offset, KnownZero, KnownOne, DL, 0, nullptr, CI, - nullptr); - KnownZero.flipAllBits(); + KnownBits Known(BitWidth); + computeKnownBits(Offset, Known, DL, 0, nullptr, CI, nullptr); + Known.Zero.flipAllBits(); size_t ArrSize = cast<ArrayType>(GEP->getSourceElementType())->getNumElements(); @@ -477,7 +472,7 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { // optimize if we can prove that the program has undefined behavior when // Offset is outside that range. That is the case when GEP->getOperand(0) // is a pointer to an object whose memory extent is NullTermIdx+1. - if ((KnownZero.isNonNegative() && KnownZero.ule(NullTermIdx)) || + if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) || (GEP->isInBounds() && isa<GlobalVariable>(GEP->getOperand(0)) && NullTermIdx == ArrSize - 1)) return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx), @@ -846,6 +841,9 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B, // Is the inner call really malloc()? Function *InnerCallee = Malloc->getCalledFunction(); + if (!InnerCallee) + return nullptr; + LibFunc Func; if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) || Func != LibFunc_malloc) @@ -930,6 +928,24 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, if (V == nullptr) return nullptr; + // If call isn't an intrinsic, check that it isn't within a function with the + // same name as the float version of this call. + // + // e.g. inline float expf(float val) { return (float) exp((double) val); } + // + // A similar such definition exists in the MinGW-w64 math.h header file which + // when compiled with -O2 -ffast-math causes the generation of infinite loops + // where expf is called. + if (!Callee->isIntrinsic()) { + const Function *F = CI->getFunction(); + StringRef FName = F->getName(); + StringRef CalleeName = Callee->getName(); + if ((FName.size() == (CalleeName.size() + 1)) && + (FName.back() == 'f') && + FName.startswith(CalleeName)) + return nullptr; + } + // Propagate fast-math flags from the existing call to the new call. IRBuilder<>::FastMathFlagGuard Guard(B); B.setFastMathFlags(CI->getFastMathFlags()); @@ -1632,7 +1648,7 @@ Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B, } static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) { - if (!ColdErrorCalls || !Callee || !Callee->isDeclaration()) + if (!Callee || !Callee->isDeclaration()) return false; if (StreamArg < 0) |