src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2016-01-22 21:16:09 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2016-01-22 21:16:09 +0000
commit	dadbdfff07596fc3b48cc1e735181b9b8c893f67 (patch)
tree	b27dffbc94bfeb477e1ff5e484d34d409ec05813 /lib/Transforms
parent	dfab1a98e00a97e03f1d59a3cbdc33ced8622a9c (diff)

vendor/llvm/llvm-release_38-r258549

Diffstat (limited to 'lib/Transforms')

-rw-r--r--

lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

187

-rw-r--r--

lib/Transforms/Utils/InlineFunction.cpp

331

-rw-r--r--

lib/Transforms/Utils/Local.cpp

235

-rw-r--r--

lib/Transforms/Utils/SimplifyLibCalls.cpp

4 files changed, 583 insertions, 225 deletions

diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 95c50d32c820..76cefd97cd8f 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

@@ -17,6 +17,7 @@

#include "llvm/IR/Intrinsics.h"

#include "llvm/IR/PatternMatch.h"

#include "llvm/Transforms/Utils/CmpInstAnalysis.h"

+#include "llvm/Transforms/Utils/Local.h"

using namespace llvm;

using namespace PatternMatch;

@@ -1565,190 +1566,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {

return Changed ? &I : nullptr;

}

-/// Analyze the specified subexpression and see if it is capable of providing

-/// pieces of a bswap or bitreverse. The subexpression provides a potential

-/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in

-/// the output of the expression came from a corresponding bit in some other

-/// value. This function is recursive, and the end result is a mapping of

-/// (value, bitnumber) to bitnumber. It is the caller's responsibility to

-/// validate that all `value`s are identical and that the bitnumber to bitnumber

-/// mapping is correct for a bswap or bitreverse.

-///

-/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know

-/// that the expression deposits the low byte of %X into the high byte of the

-/// result and that all other bits are zero. This expression is accepted,

-/// BitValues[24-31] are set to %X and BitProvenance[24-31] are set to [0-7].

-///

-/// This function returns true if the match was unsuccessful and false if so.

-/// On entry to the function the "OverallLeftShift" is a signed integer value

-/// indicating the number of bits that the subexpression is later shifted. For

-/// example, if the expression is later right shifted by 16 bits, the

-/// OverallLeftShift value would be -16 on entry. This is used to specify which

-/// bits of BitValues are actually being set.

-///

-/// Similarly, BitMask is a bitmask where a bit is clear if its corresponding

-/// bit is masked to zero by a user. For example, in (X & 255), X will be

-/// processed with a bytemask of 255. BitMask is always in the local

-/// (OverallLeftShift) coordinate space.

-///

-static bool CollectBitParts(Value *V, int OverallLeftShift, APInt BitMask,

- SmallVectorImpl<Value *> &BitValues,

- SmallVectorImpl<int> &BitProvenance) {

- if (Instruction *I = dyn_cast<Instruction>(V)) {

- // If this is an or instruction, it may be an inner node of the bswap.

- if (I->getOpcode() == Instruction::Or)

- return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,

- BitValues, BitProvenance) ||

- CollectBitParts(I->getOperand(1), OverallLeftShift, BitMask,

- BitValues, BitProvenance);

- // If this is a logical shift by a constant, recurse with OverallLeftShift

- // and BitMask adjusted.

- if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {

- unsigned ShAmt =

- cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);

- // Ensure the shift amount is defined.

- if (ShAmt > BitValues.size())

- return true;

- unsigned BitShift = ShAmt;

- if (I->getOpcode() == Instruction::Shl) {

- // X << C -> collect(X, +C)

- OverallLeftShift += BitShift;

- BitMask = BitMask.lshr(BitShift);

- } else {

- // X >>u C -> collect(X, -C)

- OverallLeftShift -= BitShift;

- BitMask = BitMask.shl(BitShift);

- }

- if (OverallLeftShift >= (int)BitValues.size())

- return true;

- if (OverallLeftShift <= -(int)BitValues.size())

- return true;

- return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,

- BitValues, BitProvenance);

- }

- // If this is a logical 'and' with a mask that clears bits, clear the

- // corresponding bits in BitMask.

- if (I->getOpcode() == Instruction::And &&

- isa<ConstantInt>(I->getOperand(1))) {

- unsigned NumBits = BitValues.size();

- APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1);

- const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();

- for (unsigned i = 0; i != NumBits; ++i, Bit <<= 1) {

- // If this bit is masked out by a later operation, we don't care what

- // the and mask is.

- if (BitMask[i] == 0)

- continue;

- // If the AndMask is zero for this bit, clear the bit.

- APInt MaskB = AndMask & Bit;

- if (MaskB == 0) {

- BitMask.clearBit(i);

- continue;

- }

- // Otherwise, this bit is kept.

- }

- return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,

- BitValues, BitProvenance);

- }

- // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be

- // the input value to the bswap/bitreverse. To be part of a bswap or

- // bitreverse we must be demanding a contiguous range of bits from it.

- unsigned InputBitLen = BitMask.countPopulation();

- unsigned InputBitNo = BitMask.countTrailingZeros();

- if (BitMask.getBitWidth() - BitMask.countLeadingZeros() - InputBitNo !=

- InputBitLen)

- // Not a contiguous set range of bits!

- return true;

- // We know we're moving a contiguous range of bits from the input to the

- // output. Record which bits in the output came from which bits in the input.

- unsigned DestBitNo = InputBitNo + OverallLeftShift;

- for (unsigned I = 0; I < InputBitLen; ++I)

- BitProvenance[DestBitNo + I] = InputBitNo + I;

- // If the destination bit value is already defined, the values are or'd

- // together, which isn't a bswap/bitreverse (unless it's an or of the same

- // bits).

- if (BitValues[DestBitNo] && BitValues[DestBitNo] != V)

- return true;

- for (unsigned I = 0; I < InputBitLen; ++I)

- BitValues[DestBitNo + I] = V;

- return false;

-static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To,

- unsigned BitWidth) {

- if (From % 8 != To % 8)

- return false;

- // Convert from bit indices to byte indices and check for a byte reversal.

- From >>= 3;

- To >>= 3;

- BitWidth >>= 3;

- return From == BitWidth - To - 1;

-static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To,

- unsigned BitWidth) {

- return From == BitWidth - To - 1;

/// Given an OR instruction, check to see if this is a bswap or bitreverse

/// idiom. If so, insert the new intrinsic and return it.

Instruction *InstCombiner::MatchBSwapOrBitReverse(BinaryOperator &I) {

- IntegerType *ITy = dyn_cast<IntegerType>(I.getType());

- if (!ITy)

- return nullptr; // Can't do vectors.

- unsigned BW = ITy->getBitWidth();

- /// We keep track of which bit (BitProvenance) inside which value (BitValues)

- /// defines each bit in the result.

- SmallVector<Value *, 8> BitValues(BW, nullptr);

- SmallVector<int, 8> BitProvenance(BW, -1);

- // Try to find all the pieces corresponding to the bswap.

- APInt BitMask = APInt::getAllOnesValue(BitValues.size());

- if (CollectBitParts(&I, 0, BitMask, BitValues, BitProvenance))

- return nullptr;

- // Check to see if all of the bits come from the same value.

- Value *V = BitValues[0];

- if (!V) return nullptr; // Didn't find a bit? Must be zero.

- if (!std::all_of(BitValues.begin(), BitValues.end(),

- [&](const Value *X) { return X == V; }))

- return nullptr;

- // Now, is the bit permutation correct for a bswap or a bitreverse? We can

- // only byteswap values with an even number of bytes.

- bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true;;

- for (unsigned i = 0, e = BitValues.size(); i != e; ++i) {

- OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW);

- OKForBitReverse &=

- bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW);

- }

- Intrinsic::ID Intrin;

- if (OKForBSwap)

- Intrin = Intrinsic::bswap;

- else if (OKForBitReverse)

- Intrin = Intrinsic::bitreverse;

- else

+ SmallVector<Instruction*, 4> Insts;

+ if (!recognizeBitReverseOrBSwapIdiom(&I, true, false, Insts))

return nullptr;

+ Instruction *LastInst = Insts.pop_back_val();

+ LastInst->removeFromParent();

- Function *F = Intrinsic::getDeclaration(I.getModule(), Intrin, ITy);

- return CallInst::Create(F, V);

+ for (auto *Inst : Insts)

+ Worklist.Add(Inst);

+ return LastInst;

}

/// We have an expression of the form (A&C)|(B&D). Check if A is (cond?-1:0)

diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 14574119b9a8..79282a2a703b 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp

@@ -179,13 +179,244 @@ void LandingPadInliningInfo::forwardResume(

RI->eraseFromParent();

}

+/// Helper for getUnwindDestToken/getUnwindDestTokenHelper.

+static Value *getParentPad(Value *EHPad) {

+ if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad))

+ return FPI->getParentPad();

+ return cast<CatchSwitchInst>(EHPad)->getParentPad();

+typedef DenseMap<Instruction *, Value *> UnwindDestMemoTy;

+/// Helper for getUnwindDestToken that does the descendant-ward part of

+/// the search.

+static Value *getUnwindDestTokenHelper(Instruction *EHPad,

+ UnwindDestMemoTy &MemoMap) {

+ SmallVector<Instruction *, 8> Worklist(1, EHPad);

+ while (!Worklist.empty()) {

+ Instruction *CurrentPad = Worklist.pop_back_val();

+ // We only put pads on the worklist that aren't in the MemoMap. When

+ // we find an unwind dest for a pad we may update its ancestors, but

+ // the queue only ever contains uncles/great-uncles/etc. of CurrentPad,

+ // so they should never get updated while queued on the worklist.

+ assert(!MemoMap.count(CurrentPad));

+ Value *UnwindDestToken = nullptr;

+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) {

+ if (CatchSwitch->hasUnwindDest()) {

+ UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI();

+ } else {

+ // Catchswitch doesn't have a 'nounwind' variant, and one might be

+ // annotated as "unwinds to caller" when really it's nounwind (see

+ // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the

+ // parent's unwind dest from this. We can check its catchpads'

+ // descendants, since they might include a cleanuppad with an

+ // "unwinds to caller" cleanupret, which can be trusted.

+ for (auto HI = CatchSwitch->handler_begin(),

+ HE = CatchSwitch->handler_end();

+ HI != HE && !UnwindDestToken; ++HI) {

+ BasicBlock *HandlerBlock = *HI;

+ auto *CatchPad = cast<CatchPadInst>(HandlerBlock->getFirstNonPHI());

+ for (User *Child : CatchPad->users()) {

+ // Intentionally ignore invokes here -- since the catchswitch is

+ // marked "unwind to caller", it would be a verifier error if it

+ // contained an invoke which unwinds out of it, so any invoke we'd

+ // encounter must unwind to some child of the catch.

+ if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child))

+ continue;

+ Instruction *ChildPad = cast<Instruction>(Child);

+ auto Memo = MemoMap.find(ChildPad);

+ if (Memo == MemoMap.end()) {

+ // Haven't figure out this child pad yet; queue it.

+ Worklist.push_back(ChildPad);

+ continue;

+ }

+ // We've already checked this child, but might have found that

+ // it offers no proof either way.

+ Value *ChildUnwindDestToken = Memo->second;

+ if (!ChildUnwindDestToken)

+ continue;

+ // We already know the child's unwind dest, which can either

+ // be ConstantTokenNone to indicate unwind to caller, or can

+ // be another child of the catchpad. Only the former indicates

+ // the unwind dest of the catchswitch.

+ if (isa<ConstantTokenNone>(ChildUnwindDestToken)) {

+ UnwindDestToken = ChildUnwindDestToken;

+ break;

+ }

+ assert(getParentPad(ChildUnwindDestToken) == CatchPad);

+ }

+ } else {

+ auto *CleanupPad = cast<CleanupPadInst>(CurrentPad);

+ for (User *U : CleanupPad->users()) {

+ if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) {

+ if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest())

+ UnwindDestToken = RetUnwindDest->getFirstNonPHI();

+ else

+ UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext());

+ break;

+ }

+ Value *ChildUnwindDestToken;

+ if (auto *Invoke = dyn_cast<InvokeInst>(U)) {

+ ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI();

+ } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) {

+ Instruction *ChildPad = cast<Instruction>(U);

+ auto Memo = MemoMap.find(ChildPad);

+ if (Memo == MemoMap.end()) {

+ // Haven't resolved this child yet; queue it and keep searching.

+ Worklist.push_back(ChildPad);

+ continue;

+ }

+ // We've checked this child, but still need to ignore it if it

+ // had no proof either way.

+ ChildUnwindDestToken = Memo->second;

+ if (!ChildUnwindDestToken)

+ continue;

+ } else {

+ // Not a relevant user of the cleanuppad

+ continue;

+ }

+ // In a well-formed program, the child/invoke must either unwind to

+ // an(other) child of the cleanup, or exit the cleanup. In the

+ // first case, continue searching.

+ if (isa<Instruction>(ChildUnwindDestToken) &&

+ getParentPad(ChildUnwindDestToken) == CleanupPad)

+ continue;

+ UnwindDestToken = ChildUnwindDestToken;

+ break;

+ }

+ // If we haven't found an unwind dest for CurrentPad, we may have queued its

+ // children, so move on to the next in the worklist.

+ if (!UnwindDestToken)

+ continue;

+ // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits

+ // any ancestors of CurrentPad up to but not including UnwindDestToken's

+ // parent pad. Record this in the memo map, and check to see if the

+ // original EHPad being queried is one of the ones exited.

+ Value *UnwindParent;

+ if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken))

+ UnwindParent = getParentPad(UnwindPad);

+ else

+ UnwindParent = nullptr;

+ bool ExitedOriginalPad = false;

+ for (Instruction *ExitedPad = CurrentPad;

+ ExitedPad && ExitedPad != UnwindParent;

+ ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) {

+ // Skip over catchpads since they just follow their catchswitches.

+ if (isa<CatchPadInst>(ExitedPad))

+ continue;

+ MemoMap[ExitedPad] = UnwindDestToken;

+ ExitedOriginalPad |= (ExitedPad == EHPad);

+ }

+ if (ExitedOriginalPad)

+ return UnwindDestToken;

+ // Continue the search.

+ }

+ // No definitive information is contained within this funclet.

+ return nullptr;

+/// Given an EH pad, find where it unwinds. If it unwinds to an EH pad,

+/// return that pad instruction. If it unwinds to caller, return

+/// ConstantTokenNone. If it does not have a definitive unwind destination,

+/// return nullptr.

+///

+/// This routine gets invoked for calls in funclets in inlinees when inlining

+/// an invoke. Since many funclets don't have calls inside them, it's queried

+/// on-demand rather than building a map of pads to unwind dests up front.

+/// Determining a funclet's unwind dest may require recursively searching its

+/// descendants, and also ancestors and cousins if the descendants don't provide

+/// an answer. Since most funclets will have their unwind dest immediately

+/// available as the unwind dest of a catchswitch or cleanupret, this routine

+/// searches top-down from the given pad and then up. To avoid worst-case

+/// quadratic run-time given that approach, it uses a memo map to avoid

+/// re-processing funclet trees. The callers that rewrite the IR as they go

+/// take advantage of this, for correctness, by checking/forcing rewritten

+/// pads' entries to match the original callee view.

+static Value *getUnwindDestToken(Instruction *EHPad,

+ UnwindDestMemoTy &MemoMap) {

+ // Catchpads unwind to the same place as their catchswitch;

+ // redirct any queries on catchpads so the code below can

+ // deal with just catchswitches and cleanuppads.

+ if (auto *CPI = dyn_cast<CatchPadInst>(EHPad))

+ EHPad = CPI->getCatchSwitch();

+ // Check if we've already determined the unwind dest for this pad.

+ auto Memo = MemoMap.find(EHPad);

+ if (Memo != MemoMap.end())

+ return Memo->second;

+ // Search EHPad and, if necessary, its descendants.

+ Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap);

+ assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0));

+ if (UnwindDestToken)

+ return UnwindDestToken;

+ // No information is available for this EHPad from itself or any of its

+ // descendants. An unwind all the way out to a pad in the caller would

+ // need also to agree with the unwind dest of the parent funclet, so

+ // search up the chain to try to find a funclet with information. Put

+ // null entries in the memo map to avoid re-processing as we go up.

+ MemoMap[EHPad] = nullptr;

+ Instruction *LastUselessPad = EHPad;

+ Value *AncestorToken;

+ for (AncestorToken = getParentPad(EHPad);

+ auto *AncestorPad = dyn_cast<Instruction>(AncestorToken);

+ AncestorToken = getParentPad(AncestorToken)) {

+ // Skip over catchpads since they just follow their catchswitches.

+ if (isa<CatchPadInst>(AncestorPad))

+ continue;

+ assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]);

+ auto AncestorMemo = MemoMap.find(AncestorPad);

+ if (AncestorMemo == MemoMap.end()) {

+ UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap);

+ } else {

+ UnwindDestToken = AncestorMemo->second;

+ }

+ if (UnwindDestToken)

+ break;

+ LastUselessPad = AncestorPad;

+ }

+ // Since the whole tree under LastUselessPad has no information, it all must

+ // match UnwindDestToken; record that to avoid repeating the search.

+ SmallVector<Instruction *, 8> Worklist(1, LastUselessPad);

+ while (!Worklist.empty()) {

+ Instruction *UselessPad = Worklist.pop_back_val();

+ assert(!MemoMap.count(UselessPad) || MemoMap[UselessPad] == nullptr);

+ MemoMap[UselessPad] = UnwindDestToken;

+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) {

+ for (BasicBlock *HandlerBlock : CatchSwitch->handlers())

+ for (User *U : HandlerBlock->getFirstNonPHI()->users())

+ if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))

+ Worklist.push_back(cast<Instruction>(U));

+ } else {

+ assert(isa<CleanupPadInst>(UselessPad));

+ for (User *U : UselessPad->users())

+ if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))

+ Worklist.push_back(cast<Instruction>(U));

+ }

+ return UnwindDestToken;

/// When we inline a basic block into an invoke,

/// we have to turn all of the calls that can throw into invokes.

/// This function analyze BB to see if there are any calls, and if so,

/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI

/// nodes in that block with the values specified in InvokeDestPHIValues.

-static BasicBlock *

-HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) {

+static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(

+ BasicBlock *BB, BasicBlock *UnwindEdge,

+ UnwindDestMemoTy *FuncletUnwindMap = nullptr) {

for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {

Instruction *I = &*BBI++;

@@ -196,6 +427,31 @@ HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) {

if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue()))

continue;

+ if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) {

+ // This call is nested inside a funclet. If that funclet has an unwind

+ // destination within the inlinee, then unwinding out of this call would

+ // be UB. Rewriting this call to an invoke which targets the inlined

+ // invoke's unwind dest would give the call's parent funclet multiple

+ // unwind destinations, which is something that subsequent EH table

+ // generation can't handle and that the veirifer rejects. So when we

+ // see such a call, leave it as a call.

+ auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]);

+ Value *UnwindDestToken =

+ getUnwindDestToken(FuncletPad, *FuncletUnwindMap);

+ if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))

+ continue;

+#ifndef NDEBUG

+ Instruction *MemoKey;

+ if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))

+ MemoKey = CatchPad->getCatchSwitch();

+ else

+ MemoKey = FuncletPad;

+ assert(FuncletUnwindMap->count(MemoKey) &&

+ (*FuncletUnwindMap)[MemoKey] == UnwindDestToken &&

+ "must get memoized to avoid confusing later searches");

+#endif // NDEBUG

+ }

// Convert this function call into an invoke instruction. First, split the

// basic block.

BasicBlock *Split =

@@ -328,13 +584,23 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,

// This connects all the instructions which 'unwind to caller' to the invoke

// destination.

+ UnwindDestMemoTy FuncletUnwindMap;

for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();

BB != E; ++BB) {

if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {

if (CRI->unwindsToCaller()) {

- CleanupReturnInst::Create(CRI->getCleanupPad(), UnwindDest, CRI);

+ auto *CleanupPad = CRI->getCleanupPad();

+ CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI);

CRI->eraseFromParent();

UpdatePHINodes(&*BB);

+ // Finding a cleanupret with an unwind destination would confuse

+ // subsequent calls to getUnwindDestToken, so map the cleanuppad

+ // to short-circuit any such calls and recognize this as an "unwind

+ // to caller" cleanup.

+ assert(!FuncletUnwindMap.count(CleanupPad) ||

+ isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad]));

+ FuncletUnwindMap[CleanupPad] =

+ ConstantTokenNone::get(Caller->getContext());

}

@@ -345,12 +611,41 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,

Instruction *Replacement = nullptr;

if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {

if (CatchSwitch->unwindsToCaller()) {

+ Value *UnwindDestToken;

+ if (auto *ParentPad =

+ dyn_cast<Instruction>(CatchSwitch->getParentPad())) {

+ // This catchswitch is nested inside another funclet. If that

+ // funclet has an unwind destination within the inlinee, then

+ // unwinding out of this catchswitch would be UB. Rewriting this

+ // catchswitch to unwind to the inlined invoke's unwind dest would

+ // give the parent funclet multiple unwind destinations, which is

+ // something that subsequent EH table generation can't handle and

+ // that the veirifer rejects. So when we see such a call, leave it

+ // as "unwind to caller".

+ UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap);

+ if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))

+ continue;

+ } else {

+ // This catchswitch has no parent to inherit constraints from, and

+ // none of its descendants can have an unwind edge that exits it and

+ // targets another funclet in the inlinee. It may or may not have a

+ // descendant that definitively has an unwind to caller. In either

+ // case, we'll have to assume that any unwinds out of it may need to

+ // be routed to the caller, so treat it as though it has a definitive

+ // unwind to caller.

+ UnwindDestToken = ConstantTokenNone::get(Caller->getContext());

+ }

auto *NewCatchSwitch = CatchSwitchInst::Create(

CatchSwitch->getParentPad(), UnwindDest,

CatchSwitch->getNumHandlers(), CatchSwitch->getName(),

CatchSwitch);

for (BasicBlock *PadBB : CatchSwitch->handlers())

NewCatchSwitch->addHandler(PadBB);

+ // Propagate info for the old catchswitch over to the new one in

+ // the unwind map. This also serves to short-circuit any subsequent

+ // checks for the unwind dest of this catchswitch, which would get

+ // confused if they found the outer handler in the callee.

+ FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken;

Replacement = NewCatchSwitch;

}

} else if (!isa<FuncletPadInst>(I)) {

@@ -369,8 +664,8 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,

for (Function::iterator BB = FirstNewBlock->getIterator(),

E = Caller->end();

BB != E; ++BB)

- if (BasicBlock *NewBB =

- HandleCallsInBlockInlinedThroughInvoke(&*BB, UnwindDest))

+ if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(

+ &*BB, UnwindDest, &FuncletUnwindMap))

// Update any PHI nodes in the exceptional block to indicate that there

// is now a new entry in them.

UpdatePHINodes(NewBB);

@@ -1415,6 +1710,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,

}

+ // If we are inlining for an invoke instruction, we must make sure to rewrite

+ // any call instructions into invoke instructions. This is sensitive to which

+ // funclet pads were top-level in the inlinee, so must be done before

+ // rewriting the "parent pad" links.

+ if (auto *II = dyn_cast<InvokeInst>(TheCall)) {

+ BasicBlock *UnwindDest = II->getUnwindDest();

+ Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI();

+ if (isa<LandingPadInst>(FirstNonPHI)) {

+ HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo);

+ } else {

+ HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo);

+ }

// Update the lexical scopes of the new funclets and callsites.

// Anything that had 'none' as its parent is now nested inside the callsite's

// EHPad.

@@ -1472,18 +1781,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,

}

- // If we are inlining for an invoke instruction, we must make sure to rewrite

- // any call instructions into invoke instructions.

- if (auto *II = dyn_cast<InvokeInst>(TheCall)) {

- BasicBlock *UnwindDest = II->getUnwindDest();

- Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI();

- if (isa<LandingPadInst>(FirstNonPHI)) {

- HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo);

- } else {

- HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo);

- }

// Handle any inlined musttail call sites. In order for a new call site to be

// musttail, the source of the clone and the inlined call site must have been

// musttail. Therefore it's safe to return without merging control into the

diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index d2793e5ecb5b..abc9b65f7a39 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp

@@ -944,37 +944,44 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {

static unsigned enforceKnownAlignment(Value *V, unsigned Align,

unsigned PrefAlign,

const DataLayout &DL) {

+ assert(PrefAlign > Align);

V = V->stripPointerCasts();

if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {

+ // TODO: ideally, computeKnownBits ought to have used

+ // AllocaInst::getAlignment() in its computation already, making

+ // the below max redundant. But, as it turns out,

+ // stripPointerCasts recurses through infinite layers of bitcasts,

+ // while computeKnownBits is not allowed to traverse more than 6

+ // levels.

+ Align = std::max(AI->getAlignment(), Align);

+ if (PrefAlign <= Align)

+ return Align;

// If the preferred alignment is greater than the natural stack alignment

// then don't round up. This avoids dynamic stack realignment.

if (DL.exceedsNaturalStackAlignment(PrefAlign))

return Align;

- // If there is a requested alignment and if this is an alloca, round up.

- if (AI->getAlignment() >= PrefAlign)

- return AI->getAlignment();

AI->setAlignment(PrefAlign);

return PrefAlign;

}

if (auto *GO = dyn_cast<GlobalObject>(V)) {

+ // TODO: as above, this shouldn't be necessary.

+ Align = std::max(GO->getAlignment(), Align);

+ if (PrefAlign <= Align)

+ return Align;

// If there is a large requested alignment and we can, bump up the alignment

// of the global. If the memory we set aside for the global may not be the

// memory used by the final program then it is impossible for us to reliably

// enforce the preferred alignment.

- if (!GO->isStrongDefinitionForLinker())

+ if (!GO->canIncreaseAlignment())

return Align;

- if (GO->getAlignment() >= PrefAlign)

- return GO->getAlignment();

- // We can only increase the alignment of the global if it has no alignment

- // specified or if it is not assigned a section. If it is assigned a

- // section, the global could be densely packed with other objects in the

- // section, increasing the alignment could cause padding issues.

- if (!GO->hasSection() || GO->getAlignment() == 0)

- GO->setAlignment(PrefAlign);

- return GO->getAlignment();

+ GO->setAlignment(PrefAlign);

+ return PrefAlign;

}

return Align;

@@ -1585,3 +1592,205 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS) {

return false;

}

+/// A potential constituent of a bitreverse or bswap expression. See

+/// collectBitParts for a fuller explanation.

+struct BitPart {

+ BitPart(Value *P, unsigned BW) : Provider(P) {

+ Provenance.resize(BW);

+ }

+ /// The Value that this is a bitreverse/bswap of.

+ Value *Provider;

+ /// The "provenance" of each bit. Provenance[A] = B means that bit A

+ /// in Provider becomes bit B in the result of this expression.

+ SmallVector<int8_t, 32> Provenance; // int8_t means max size is i128.

+ enum { Unset = -1 };

+};

+/// Analyze the specified subexpression and see if it is capable of providing

+/// pieces of a bswap or bitreverse. The subexpression provides a potential

+/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in

+/// the output of the expression came from a corresponding bit in some other

+/// value. This function is recursive, and the end result is a mapping of

+/// bitnumber to bitnumber. It is the caller's responsibility to validate that

+/// the bitnumber to bitnumber mapping is correct for a bswap or bitreverse.

+///

+/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know

+/// that the expression deposits the low byte of %X into the high byte of the

+/// result and that all other bits are zero. This expression is accepted and a

+/// BitPart is returned with Provider set to %X and Provenance[24-31] set to

+/// [0-7].

+///

+/// To avoid revisiting values, the BitPart results are memoized into the

+/// provided map. To avoid unnecessary copying of BitParts, BitParts are

+/// constructed in-place in the \c BPS map. Because of this \c BPS needs to

+/// store BitParts objects, not pointers. As we need the concept of a nullptr

+/// BitParts (Value has been analyzed and the analysis failed), we an Optional

+/// type instead to provide the same functionality.

+///

+/// Because we pass around references into \c BPS, we must use a container that

+/// does not invalidate internal references (std::map instead of DenseMap).

+///

+static const Optional<BitPart> &

+collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,

+ std::map<Value *, Optional<BitPart>> &BPS) {

+ auto I = BPS.find(V);

+ if (I != BPS.end())

+ return I->second;

+ auto &Result = BPS[V] = None;

+ auto BitWidth = cast<IntegerType>(V->getType())->getBitWidth();

+ if (Instruction *I = dyn_cast<Instruction>(V)) {

+ // If this is an or instruction, it may be an inner node of the bswap.

+ if (I->getOpcode() == Instruction::Or) {

+ auto &A = collectBitParts(I->getOperand(0), MatchBSwaps,

+ MatchBitReversals, BPS);

+ auto &B = collectBitParts(I->getOperand(1), MatchBSwaps,

+ MatchBitReversals, BPS);

+ if (!A || !B)

+ return Result;

+ // Try and merge the two together.

+ if (!A->Provider || A->Provider != B->Provider)

+ return Result;

+ Result = BitPart(A->Provider, BitWidth);

+ for (unsigned i = 0; i < A->Provenance.size(); ++i) {

+ if (A->Provenance[i] != BitPart::Unset &&

+ B->Provenance[i] != BitPart::Unset &&

+ A->Provenance[i] != B->Provenance[i])

+ return Result = None;

+ if (A->Provenance[i] == BitPart::Unset)

+ Result->Provenance[i] = B->Provenance[i];

+ else

+ Result->Provenance[i] = A->Provenance[i];

+ }

+ return Result;

+ }

+ // If this is a logical shift by a constant, recurse then shift the result.

+ if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {

+ unsigned BitShift =

+ cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);

+ // Ensure the shift amount is defined.

+ if (BitShift > BitWidth)

+ return Result;

+ auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,

+ MatchBitReversals, BPS);

+ if (!Res)

+ return Result;

+ Result = Res;

+ // Perform the "shift" on BitProvenance.

+ auto &P = Result->Provenance;

+ if (I->getOpcode() == Instruction::Shl) {

+ P.erase(std::prev(P.end(), BitShift), P.end());

+ P.insert(P.begin(), BitShift, BitPart::Unset);

+ } else {

+ P.erase(P.begin(), std::next(P.begin(), BitShift));

+ P.insert(P.end(), BitShift, BitPart::Unset);

+ }

+ return Result;

+ }

+ // If this is a logical 'and' with a mask that clears bits, recurse then

+ // unset the appropriate bits.

+ if (I->getOpcode() == Instruction::And &&

+ isa<ConstantInt>(I->getOperand(1))) {

+ APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1);

+ const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();

+ // Check that the mask allows a multiple of 8 bits for a bswap, for an

+ // early exit.

+ unsigned NumMaskedBits = AndMask.countPopulation();

+ if (!MatchBitReversals && NumMaskedBits % 8 != 0)

+ return Result;

+ auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,

+ MatchBitReversals, BPS);

+ if (!Res)

+ return Result;

+ Result = Res;

+ for (unsigned i = 0; i < BitWidth; ++i, Bit <<= 1)

+ // If the AndMask is zero for this bit, clear the bit.

+ if ((AndMask & Bit) == 0)

+ Result->Provenance[i] = BitPart::Unset;

+ return Result;

+ }

+ // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be

+ // the input value to the bswap/bitreverse.

+ Result = BitPart(V, BitWidth);

+ for (unsigned i = 0; i < BitWidth; ++i)

+ Result->Provenance[i] = i;

+ return Result;

+static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To,

+ unsigned BitWidth) {

+ if (From % 8 != To % 8)

+ return false;

+ // Convert from bit indices to byte indices and check for a byte reversal.

+ From >>= 3;

+ To >>= 3;

+ BitWidth >>= 3;

+ return From == BitWidth - To - 1;

+static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To,

+ unsigned BitWidth) {

+ return From == BitWidth - To - 1;

+/// Given an OR instruction, check to see if this is a bitreverse

+/// idiom. If so, insert the new intrinsic and return true.

+bool llvm::recognizeBitReverseOrBSwapIdiom(

+ Instruction *I, bool MatchBSwaps, bool MatchBitReversals,

+ SmallVectorImpl<Instruction *> &InsertedInsts) {

+ if (Operator::getOpcode(I) != Instruction::Or)

+ return false;

+ if (!MatchBSwaps && !MatchBitReversals)

+ return false;

+ IntegerType *ITy = dyn_cast<IntegerType>(I->getType());

+ if (!ITy || ITy->getBitWidth() > 128)

+ return false; // Can't do vectors or integers > 128 bits.

+ unsigned BW = ITy->getBitWidth();

+ // Try to find all the pieces corresponding to the bswap.

+ std::map<Value *, Optional<BitPart>> BPS;

+ auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS);

+ if (!Res)

+ return false;

+ auto &BitProvenance = Res->Provenance;

+ // Now, is the bit permutation correct for a bswap or a bitreverse? We can

+ // only byteswap values with an even number of bytes.

+ bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true;

+ for (unsigned i = 0; i < BW; ++i) {

+ OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW);

+ OKForBitReverse &=

+ bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW);

+ }

+ Intrinsic::ID Intrin;

+ if (OKForBSwap && MatchBSwaps)

+ Intrin = Intrinsic::bswap;

+ else if (OKForBitReverse && MatchBitReversals)

+ Intrin = Intrinsic::bitreverse;

+ else

+ return false;

+ Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, ITy);

+ InsertedInsts.push_back(CallInst::Create(F, Res->Provider, "rev", I));

+ return true;

diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index dc0744060142..2f3c31128cf0 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp

@@ -970,15 +970,34 @@ static Value *valueHasFloatPrecision(Value *Val) {

return nullptr;

}

-//===----------------------------------------------------------------------===//

-// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'

+/// Any floating-point library function that we're trying to simplify will have

+/// a signature of the form: fptype foo(fptype param1, fptype param2, ...).

+/// CheckDoubleTy indicates that 'fptype' must be 'double'.

+static bool matchesFPLibFunctionSignature(const Function *F, unsigned NumParams,

+ bool CheckDoubleTy) {

+ FunctionType *FT = F->getFunctionType();

+ if (FT->getNumParams() != NumParams)

+ return false;

+ // The return type must match what we're looking for.

+ Type *RetTy = FT->getReturnType();

+ if (CheckDoubleTy ? !RetTy->isDoubleTy() : !RetTy->isFloatingPointTy())

+ return false;

+ // Each parameter must match the return type, and therefore, match every other

+ // parameter too.

+ for (const Type *ParamTy : FT->params())

+ if (ParamTy != RetTy)

+ return false;

-Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,

- bool CheckRetType) {

+ return true;

+/// Shrink double -> float for unary functions like 'floor'.

+static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,

+ bool CheckRetType) {

Function *Callee = CI->getCalledFunction();

- FunctionType *FT = Callee->getFunctionType();

- if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||

- !FT->getParamType(0)->isDoubleTy())

+ if (!matchesFPLibFunctionSignature(Callee, 1, true))

return nullptr;

if (CheckRetType) {

@@ -1013,15 +1032,10 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,

return B.CreateFPExt(V, B.getDoubleTy());

}

-// Double -> Float Shrinking Optimizations for Binary Functions like 'fmin/fmax'

-Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {

+/// Shrink double -> float for binary functions like 'fmin/fmax'.

+static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {

Function *Callee = CI->getCalledFunction();

- FunctionType *FT = Callee->getFunctionType();

- // Just make sure this has 2 arguments of the same FP type, which match the

- // result type.

- if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||

- FT->getParamType(0) != FT->getParamType(1) ||

- !FT->getParamType(0)->isFloatingPointTy())

+ if (!matchesFPLibFunctionSignature(Callee, 2, true))

return nullptr;

// If this is something like 'fmin((double)floatval1, (double)floatval2)',

@@ -1394,12 +1408,21 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {

Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {

Function *Callee = CI->getCalledFunction();

Value *Ret = nullptr;

if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" ||

Callee->getIntrinsicID() == Intrinsic::sqrt))

Ret = optimizeUnaryDoubleFP(CI, B, true);

+ // FIXME: Refactor - this check is repeated all over this file and even in the

+ // preceding call to shrink double -> float.

+ // Make sure this has 1 argument of FP type, which matches the result type.

+ FunctionType *FT = Callee->getFunctionType();

+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||

+ !FT->getParamType(0)->isFloatingPointTy())

+ return Ret;

if (!CI->hasUnsafeAlgebra())

return Ret;