src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2017-04-26 19:45:00 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-04-26 19:45:00 +0000
commit	12f3ca4cdb95b193af905a00e722a4dcb40b3de3 (patch)
tree	ae1a7fcfc24a8d4b23206c57121c3f361d4b7f84 /lib/Transforms/Utils
parent	d99dafe2e4a385dd2a6c76da6d8258deb100657b (diff)

vendor/llvm/llvm-trunk-r301441

Diffstat (limited to 'lib/Transforms/Utils')

-rw-r--r--

lib/Transforms/Utils/BypassSlowDivision.cpp

-rw-r--r--

lib/Transforms/Utils/CodeExtractor.cpp

-rw-r--r--

lib/Transforms/Utils/Local.cpp

-rw-r--r--

lib/Transforms/Utils/LoopUnroll.cpp

-rw-r--r--

lib/Transforms/Utils/LowerSwitch.cpp

-rw-r--r--

lib/Transforms/Utils/SimplifyCFG.cpp

-rw-r--r--

lib/Transforms/Utils/SimplifyInstructions.cpp

-rw-r--r--

lib/Transforms/Utils/SimplifyLibCalls.cpp

8 files changed, 134 insertions, 73 deletions

diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index 1cfe3bd53648..7ffdad597a9b 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp

@@ -22,6 +22,7 @@

#include "llvm/IR/Function.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/Instructions.h"

+#include "llvm/Support/KnownBits.h"

#include "llvm/Transforms/Utils/Local.h"

using namespace llvm;

@@ -256,14 +257,14 @@ ValueRange FastDivInsertionTask::getValueRange(Value *V,

unsigned HiBits = LongLen - ShortLen;

const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout();

- APInt Zeros(LongLen, 0), Ones(LongLen, 0);

+ KnownBits Known(LongLen);

- computeKnownBits(V, Zeros, Ones, DL);

+ computeKnownBits(V, Known, DL);

- if (Zeros.countLeadingOnes() >= HiBits)

+ if (Known.Zero.countLeadingOnes() >= HiBits)

return VALRNG_KNOWN_SHORT;

- if (Ones.countLeadingZeros() < HiBits)

+ if (Known.One.countLeadingZeros() < HiBits)

return VALRNG_LIKELY_LONG;

// Long integer divisions are often used in hashtable implementations. It's

diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 82552684b832..ed72099ec3ed 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp

@@ -73,24 +73,26 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) {

}

/// \brief Build a set of blocks to extract if the input blocks are viable.

-template <typename IteratorT>

-static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin,

- IteratorT BBEnd) {

+static SetVector<BasicBlock *>

+buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT) {

+ assert(!BBs.empty() && "The set of blocks to extract must be non-empty");

SetVector<BasicBlock *> Result;

- assert(BBBegin != BBEnd);

// Loop over the blocks, adding them to our set-vector, and aborting with an

// empty set if we encounter invalid blocks.

- do {

- if (!Result.insert(*BBBegin))

- llvm_unreachable("Repeated basic blocks in extraction input");

+ for (BasicBlock *BB : BBs) {

- if (!CodeExtractor::isBlockValidForExtraction(**BBBegin)) {

+ // If this block is dead, don't process it.

+ if (DT && !DT->isReachableFromEntry(BB))

+ continue;

+ if (!Result.insert(BB))

+ llvm_unreachable("Repeated basic blocks in extraction input");

+ if (!CodeExtractor::isBlockValidForExtraction(*BB)) {

Result.clear();

return Result;

}

- } while (++BBBegin != BBEnd);

+ }

#ifndef NDEBUG

for (SetVector<BasicBlock *>::iterator I = std::next(Result.begin()),

@@ -106,23 +108,17 @@ static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin,

return Result;

}

-/// \brief Helper to call buildExtractionBlockSet with an ArrayRef.

-static SetVector<BasicBlock *>

-buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs) {

- return buildExtractionBlockSet(BBs.begin(), BBs.end());

CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,

bool AggregateArgs, BlockFrequencyInfo *BFI,

BranchProbabilityInfo *BPI)

: DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),

- BPI(BPI), Blocks(buildExtractionBlockSet(BBs)), NumExitBlocks(~0U) {}

+ BPI(BPI), Blocks(buildExtractionBlockSet(BBs, DT)), NumExitBlocks(~0U) {}

CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,

BlockFrequencyInfo *BFI,

BranchProbabilityInfo *BPI)

: DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),

- BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks())),

+ BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT)),

NumExitBlocks(~0U) {}

/// definedInRegion - Return true if the specified value is defined in the

@@ -194,9 +190,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {

// containing PHI nodes merging values from outside of the region, and a

// second that contains all of the code for the block and merges back any

// incoming values from inside of the region.

- BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI()->getIterator();

- BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs,

- Header->getName()+".ce");

+ BasicBlock *NewBB = llvm::SplitBlock(Header, Header->getFirstNonPHI(), DT);

// We only want to code extract the second block now, and it becomes the new

// header of the region.

@@ -205,11 +199,6 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {

Blocks.insert(NewBB);

Header = NewBB;

- // Okay, update dominator sets. The blocks that dominate the new one are the

- // blocks that dominate TIBB plus the new block itself.

- if (DT)

- DT->splitBlock(NewBB);

// Okay, now we need to adjust the PHI nodes and any branches from within the

// region to go to the new header block instead of the old header block.

if (NumPredsFromRegion) {

@@ -224,12 +213,14 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {

// Okay, everything within the region is now branching to the right block, we

// just have to update the PHI nodes now, inserting PHI nodes into NewBB.

+ BasicBlock::iterator AfterPHIs;

for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) {

PHINode *PN = cast<PHINode>(AfterPHIs);

// Create a new PHI node in the new region, which has an incoming value

// from OldPred of PN.

PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion,

PN->getName() + ".ce", &NewBB->front());

+ PN->replaceAllUsesWith(NewPN);

NewPN->addIncoming(PN, OldPred);

// Loop over all of the incoming value in PN, moving them to NewPN if they

diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 8c5442762643..d3002c5fb750 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp

@@ -45,6 +45,7 @@

#include "llvm/IR/PatternMatch.h"

#include "llvm/IR/ValueHandle.h"

#include "llvm/Support/Debug.h"

+#include "llvm/Support/KnownBits.h"

#include "llvm/Support/MathExtras.h"

#include "llvm/Support/raw_ostream.h"

using namespace llvm;

@@ -1038,9 +1039,9 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,

"getOrEnforceKnownAlignment expects a pointer!");

unsigned BitWidth = DL.getPointerTypeSizeInBits(V->getType());

- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);

- computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, CxtI, DT);

- unsigned TrailZ = KnownZero.countTrailingOnes();

+ KnownBits Known(BitWidth);

+ computeKnownBits(V, Known, DL, 0, AC, CxtI, DT);

+ unsigned TrailZ = Known.Zero.countTrailingOnes();

// Avoid trouble with ridiculously large TrailZ values, such as

// those computed from a null pointer.

@@ -1268,21 +1269,37 @@ static void appendOffset(SmallVectorImpl<uint64_t> &Ops, int64_t Offset) {

}

-/// Prepend \p DIExpr with a deref and offset operation.

+enum { WithStackValue = true };

+/// Prepend \p DIExpr with a deref and offset operation and optionally turn it

+/// into a stack value.

static DIExpression *prependDIExpr(DIBuilder &Builder, DIExpression *DIExpr,

- bool Deref, int64_t Offset) {

- if (!Deref && !Offset)

+ bool Deref, int64_t Offset = 0,

+ bool StackValue = false) {

+ if (!Deref && !Offset && !StackValue)

return DIExpr;

- // Create a copy of the original DIDescriptor for user variable, prepending

- // "deref" operation to a list of address elements, as new llvm.dbg.declare

- // will take a value storing address of the memory for variable, not

- // alloca itself.

- SmallVector<uint64_t, 4> Ops;

+ SmallVector<uint64_t, 8> Ops;

+ appendOffset(Ops, Offset);

if (Deref)

Ops.push_back(dwarf::DW_OP_deref);

- appendOffset(Ops, Offset);

if (DIExpr)

- Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());

+ for (auto Op : DIExpr->expr_ops()) {

+ // A DW_OP_stack_value comes at the end, but before a DW_OP_LLVM_fragment.

+ if (StackValue) {

+ if (Op.getOp() == dwarf::DW_OP_stack_value)

+ StackValue = false;

+ else if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) {

+ Ops.push_back(dwarf::DW_OP_stack_value);

+ StackValue = false;

+ }

+ Ops.push_back(Op.getOp());

+ for (unsigned I = 0; I < Op.getNumArgs(); ++I)

+ Ops.push_back(Op.getArg(I));

+ }

+ if (StackValue)

+ Ops.push_back(dwarf::DW_OP_stack_value);

return Builder.createExpression(Ops);

}

@@ -1374,12 +1391,15 @@ void llvm::salvageDebugInfo(Instruction &I) {

unsigned BitWidth =

M.getDataLayout().getPointerSizeInBits(GEP->getPointerAddressSpace());

APInt Offset(BitWidth, 0);

- // Rewrite a constant GEP into a DIExpression.

+ // Rewrite a constant GEP into a DIExpression. Since we are performing

+ // arithmetic to compute the variable's *value* in the DIExpression, we

+ // need to mark the expression with a DW_OP_stack_value.

if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) {

auto *DIExpr = DVI->getExpression();

DIBuilder DIB(M, /*AllowUnresolved*/ false);

- // GEP offsets are i32 and thus alwaus fit into an int64_t.

- DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue());

+ // GEP offsets are i32 and thus always fit into an int64_t.

+ DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue(),

+ WithStackValue);

DVI->setOperand(0, MDWrap(I.getOperand(0)));

DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));

DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');

@@ -1391,7 +1411,7 @@ void llvm::salvageDebugInfo(Instruction &I) {

// Rewrite the load into DW_OP_deref.

auto *DIExpr = DVI->getExpression();

DIBuilder DIB(M, /*AllowUnresolved*/ false);

- DIExpr = prependDIExpr(DIB, DIExpr, WithDeref, 0);

+ DIExpr = prependDIExpr(DIB, DIExpr, WithDeref);

DVI->setOperand(0, MDWrap(I.getOperand(0)));

DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));

DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');

diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 3c669ce644e2..43ab725b0769 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp

@@ -318,6 +318,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,

return false;

}

+ // The current loop unroll pass can only unroll loops with a single latch

+ // that's a conditional branch exiting the loop.

+ // FIXME: The implementation can be extended to work with more complicated

+ // cases, e.g. loops with multiple latches.

BasicBlock *Header = L->getHeader();

BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());

@@ -328,6 +332,16 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,

return false;

}

+ auto CheckSuccessors = [&](unsigned S1, unsigned S2) {

+ return BI->getSuccessor(S1) == Header && !L->contains(BI->getSuccessor(S2));

+ };

+ if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) {

+ DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch"

+ " exiting the loop can be unrolled\n");

+ return false;

+ }

if (Header->hasAddressTaken()) {

// The loop-rotate pass can be helpful to avoid this in many cases.

DEBUG(dbgs() <<

diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index b375d51005d5..8959e77438e9 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp

@@ -403,6 +403,14 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,

Value *Val = SI->getCondition(); // The value we are switching on...

BasicBlock* Default = SI->getDefaultDest();

+ // Don't handle unreachable blocks. If there are successors with phis, this

+ // would leave them behind with missing predecessors.

+ if ((CurBlock != &F->getEntryBlock() && pred_empty(CurBlock)) ||

+ CurBlock->getSinglePredecessor() == CurBlock) {

+ DeleteList.insert(CurBlock);

+ return;

+ }

// If there is only the default destination, just branch.

if (!SI->getNumCases()) {

BranchInst::Create(Default, CurBlock);

diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 2f575b9d5027..f86e97b6cc72 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp

@@ -60,6 +60,7 @@

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h"

+#include "llvm/Support/KnownBits.h"

#include "llvm/Support/MathExtras.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/Utils/BasicBlockUtils.h"

@@ -3055,6 +3056,15 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {

BasicBlock *QFB = QBI->getSuccessor(1);

BasicBlock *PostBB = QFB->getSingleSuccessor();

+ // Make sure we have a good guess for PostBB. If QTB's only successor is

+ // QFB, then QFB is a better PostBB.

+ if (QTB->getSingleSuccessor() == QFB)

+ PostBB = QFB;

+ // If we couldn't find a good PostBB, stop.

+ if (!PostBB)

+ return false;

bool InvertPCond = false, InvertQCond = false;

// Canonicalize fallthroughs to the true branches.

if (PFB == QBI->getParent()) {

@@ -3079,8 +3089,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {

auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {

return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;

};

- if (!PostBB ||

- !HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||

+ if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||

!HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))

return false;

if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||

@@ -3746,7 +3755,7 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) {

if (!isa<DbgInfoIntrinsic>(I))

return false;

- SmallSet<BasicBlock *, 4> TrivialUnwindBlocks;

+ SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;

auto *PhiLPInst = cast<PHINode>(RI->getValue());

// Check incoming blocks to see if any of them are trivial.

@@ -4359,8 +4368,8 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,

const DataLayout &DL) {

Value *Cond = SI->getCondition();

unsigned Bits = Cond->getType()->getIntegerBitWidth();

- APInt KnownZero(Bits, 0), KnownOne(Bits, 0);

- computeKnownBits(Cond, KnownZero, KnownOne, DL, 0, AC, SI);

+ KnownBits Known(Bits);

+ computeKnownBits(Cond, Known, DL, 0, AC, SI);

// We can also eliminate cases by determining that their values are outside of

// the limited range of the condition based on how many significant (non-sign)

@@ -4372,7 +4381,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,

SmallVector<ConstantInt *, 8> DeadCases;

for (auto &Case : SI->cases()) {

APInt CaseVal = Case.getCaseValue()->getValue();

- if ((CaseVal & KnownZero) != 0 || (CaseVal & KnownOne) != KnownOne ||

+ if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||

(CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) {

DeadCases.push_back(Case.getCaseValue());

DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal << " is dead.\n");

@@ -4386,7 +4395,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,

bool HasDefault =

!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());

const unsigned NumUnknownBits =

- Bits - (KnownZero | KnownOne).countPopulation();

+ Bits - (Known.Zero | Known.One).countPopulation();

assert(NumUnknownBits <= Bits);

if (HasDefault && DeadCases.empty() &&

NumUnknownBits < 64 /* avoid overflow */ &&

diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index f6070868de44..27373427d4f7 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp

@@ -35,10 +35,8 @@ using namespace llvm;

STATISTIC(NumSimplified, "Number of redundant instructions removed");

-static bool runImpl(Function &F, const DominatorTree *DT,

- const TargetLibraryInfo *TLI, AssumptionCache *AC,

+static bool runImpl(Function &F, const SimplifyQuery &SQ,

OptimizationRemarkEmitter *ORE) {

- const DataLayout &DL = F.getParent()->getDataLayout();

SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;

bool Changed = false;

@@ -56,7 +54,8 @@ static bool runImpl(Function &F, const DominatorTree *DT,

// Don't waste time simplifying unused instructions.

if (!I->use_empty()) {

- if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC, ORE)) {

+ if (Value *V =

+ SimplifyInstruction(I, SQ.getWithInstruction(I), ORE)) {

// Mark all uses for resimplification next time round the loop.

for (User *U : I->users())

Next->insert(cast<Instruction>(U));

@@ -65,7 +64,7 @@ static bool runImpl(Function &F, const DominatorTree *DT,

Changed = true;

}

- if (RecursivelyDeleteTriviallyDeadInstructions(I, TLI)) {

+ if (RecursivelyDeleteTriviallyDeadInstructions(I, SQ.TLI)) {

// RecursivelyDeleteTriviallyDeadInstruction can remove more than one

// instruction, so simply incrementing the iterator does not work.

// When instructions get deleted re-iterate instead.

@@ -113,8 +112,9 @@ namespace {

&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);

OptimizationRemarkEmitter *ORE =

&getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();

- return runImpl(F, DT, TLI, AC, ORE);

+ const DataLayout &DL = F.getParent()->getDataLayout();

+ const SimplifyQuery SQ(DL, TLI, DT, AC);

+ return runImpl(F, SQ, ORE);

}

};

}

@@ -141,7 +141,9 @@ PreservedAnalyses InstSimplifierPass::run(Function &F,

auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);

auto &AC = AM.getResult<AssumptionAnalysis>(F);

auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);

- bool Changed = runImpl(F, &DT, &TLI, &AC, &ORE);

+ const DataLayout &DL = F.getParent()->getDataLayout();

+ const SimplifyQuery SQ(DL, &TLI, &DT, &AC);

+ bool Changed = runImpl(F, SQ, &ORE);

if (!Changed)

return PreservedAnalyses::all();

diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index aa71e3669ea2..2c1c30463a23 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp

@@ -30,6 +30,7 @@

#include "llvm/IR/Module.h"

#include "llvm/IR/PatternMatch.h"

#include "llvm/Support/CommandLine.h"

+#include "llvm/Support/KnownBits.h"

#include "llvm/Transforms/Utils/BuildLibCalls.h"

#include "llvm/Transforms/Utils/Local.h"

@@ -37,10 +38,6 @@ using namespace llvm;

using namespace PatternMatch;

static cl::opt<bool>

- ColdErrorCalls("error-reporting-is-cold", cl::init(true), cl::Hidden,

- cl::desc("Treat error-reporting calls as cold"));

-static cl::opt<bool>

EnableUnsafeFPShrink("enable-double-float-shrink", cl::Hidden,

cl::init(false),

cl::desc("Enable unsafe double to float "

@@ -459,11 +456,9 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {

Value *Offset = GEP->getOperand(2);

unsigned BitWidth = Offset->getType()->getIntegerBitWidth();

- APInt KnownZero(BitWidth, 0);

- APInt KnownOne(BitWidth, 0);

- computeKnownBits(Offset, KnownZero, KnownOne, DL, 0, nullptr, CI,

- nullptr);

- KnownZero.flipAllBits();

+ KnownBits Known(BitWidth);

+ computeKnownBits(Offset, Known, DL, 0, nullptr, CI, nullptr);

+ Known.Zero.flipAllBits();

size_t ArrSize =

cast<ArrayType>(GEP->getSourceElementType())->getNumElements();

@@ -477,7 +472,7 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {

// optimize if we can prove that the program has undefined behavior when

// Offset is outside that range. That is the case when GEP->getOperand(0)

// is a pointer to an object whose memory extent is NullTermIdx+1.

- if ((KnownZero.isNonNegative() && KnownZero.ule(NullTermIdx)) ||

+ if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) ||

(GEP->isInBounds() && isa<GlobalVariable>(GEP->getOperand(0)) &&

NullTermIdx == ArrSize - 1))

return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx),

@@ -846,6 +841,9 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,

// Is the inner call really malloc()?

Function *InnerCallee = Malloc->getCalledFunction();

+ if (!InnerCallee)

+ return nullptr;

LibFunc Func;

if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||

Func != LibFunc_malloc)

@@ -930,6 +928,24 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,

if (V == nullptr)

return nullptr;

+ // If call isn't an intrinsic, check that it isn't within a function with the

+ // same name as the float version of this call.

+ //

+ // e.g. inline float expf(float val) { return (float) exp((double) val); }

+ //

+ // A similar such definition exists in the MinGW-w64 math.h header file which

+ // when compiled with -O2 -ffast-math causes the generation of infinite loops

+ // where expf is called.

+ if (!Callee->isIntrinsic()) {

+ const Function *F = CI->getFunction();

+ StringRef FName = F->getName();

+ StringRef CalleeName = Callee->getName();

+ if ((FName.size() == (CalleeName.size() + 1)) &&

+ (FName.back() == 'f') &&

+ FName.startswith(CalleeName))

+ return nullptr;

+ }

// Propagate fast-math flags from the existing call to the new call.

IRBuilder<>::FastMathFlagGuard Guard(B);

B.setFastMathFlags(CI->getFastMathFlags());

@@ -1632,7 +1648,7 @@ Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B,

}

static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) {

- if (!ColdErrorCalls || !Callee || !Callee->isDeclaration())

+ if (!Callee || !Callee->isDeclaration())

return false;

if (StreamArg < 0)