diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2016-01-22 21:16:09 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2016-01-22 21:16:09 +0000 | 
| commit | dadbdfff07596fc3b48cc1e735181b9b8c893f67 (patch) | |
| tree | b27dffbc94bfeb477e1ff5e484d34d409ec05813 /lib/Transforms | |
| parent | dfab1a98e00a97e03f1d59a3cbdc33ced8622a9c (diff) | |
Diffstat (limited to 'lib/Transforms')
| -rw-r--r-- | lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 187 | ||||
| -rw-r--r-- | lib/Transforms/Utils/InlineFunction.cpp | 331 | ||||
| -rw-r--r-- | lib/Transforms/Utils/Local.cpp | 235 | ||||
| -rw-r--r-- | lib/Transforms/Utils/SimplifyLibCalls.cpp | 55 | 
4 files changed, 583 insertions, 225 deletions
| diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 95c50d32c820..76cefd97cd8f 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -17,6 +17,7 @@  #include "llvm/IR/Intrinsics.h"  #include "llvm/IR/PatternMatch.h"  #include "llvm/Transforms/Utils/CmpInstAnalysis.h" +#include "llvm/Transforms/Utils/Local.h"  using namespace llvm;  using namespace PatternMatch; @@ -1565,190 +1566,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {    return Changed ? &I : nullptr;  } - -/// Analyze the specified subexpression and see if it is capable of providing -/// pieces of a bswap or bitreverse. The subexpression provides a potential -/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in -/// the output of the expression came from a corresponding bit in some other -/// value. This function is recursive, and the end result is a mapping of -/// (value, bitnumber) to bitnumber. It is the caller's responsibility to -/// validate that all `value`s are identical and that the bitnumber to bitnumber -/// mapping is correct for a bswap or bitreverse. -/// -/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know -/// that the expression deposits the low byte of %X into the high byte of the -/// result and that all other bits are zero. This expression is accepted, -/// BitValues[24-31] are set to %X and BitProvenance[24-31] are set to [0-7]. -/// -/// This function returns true if the match was unsuccessful and false if so. -/// On entry to the function the "OverallLeftShift" is a signed integer value -/// indicating the number of bits that the subexpression is later shifted.  For -/// example, if the expression is later right shifted by 16 bits, the -/// OverallLeftShift value would be -16 on entry.  This is used to specify which -/// bits of BitValues are actually being set. -/// -/// Similarly, BitMask is a bitmask where a bit is clear if its corresponding -/// bit is masked to zero by a user.  For example, in (X & 255), X will be -/// processed with a bytemask of 255. BitMask is always in the local -/// (OverallLeftShift) coordinate space. -/// -static bool CollectBitParts(Value *V, int OverallLeftShift, APInt BitMask, -                            SmallVectorImpl<Value *> &BitValues, -                            SmallVectorImpl<int> &BitProvenance) { -  if (Instruction *I = dyn_cast<Instruction>(V)) { -    // If this is an or instruction, it may be an inner node of the bswap. -    if (I->getOpcode() == Instruction::Or) -      return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask, -                             BitValues, BitProvenance) || -             CollectBitParts(I->getOperand(1), OverallLeftShift, BitMask, -                             BitValues, BitProvenance); - -    // If this is a logical shift by a constant, recurse with OverallLeftShift -    // and BitMask adjusted. -    if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) { -      unsigned ShAmt = -          cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U); -      // Ensure the shift amount is defined. -      if (ShAmt > BitValues.size()) -        return true; - -      unsigned BitShift = ShAmt; -      if (I->getOpcode() == Instruction::Shl) { -        // X << C -> collect(X, +C) -        OverallLeftShift += BitShift; -        BitMask = BitMask.lshr(BitShift); -      } else { -        // X >>u C -> collect(X, -C) -        OverallLeftShift -= BitShift; -        BitMask = BitMask.shl(BitShift); -      } - -      if (OverallLeftShift >= (int)BitValues.size()) -        return true; -      if (OverallLeftShift <= -(int)BitValues.size()) -        return true; - -      return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask, -                             BitValues, BitProvenance); -    } - -    // If this is a logical 'and' with a mask that clears bits, clear the -    // corresponding bits in BitMask. -    if (I->getOpcode() == Instruction::And && -        isa<ConstantInt>(I->getOperand(1))) { -      unsigned NumBits = BitValues.size(); -      APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1); -      const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue(); - -      for (unsigned i = 0; i != NumBits; ++i, Bit <<= 1) { -        // If this bit is masked out by a later operation, we don't care what -        // the and mask is. -        if (BitMask[i] == 0) -          continue; - -        // If the AndMask is zero for this bit, clear the bit. -        APInt MaskB = AndMask & Bit; -        if (MaskB == 0) { -          BitMask.clearBit(i); -          continue; -        } - -        // Otherwise, this bit is kept. -      } - -      return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask, -                             BitValues, BitProvenance); -    } -  } - -  // Okay, we got to something that isn't a shift, 'or' or 'and'.  This must be -  // the input value to the bswap/bitreverse. To be part of a bswap or -  // bitreverse we must be demanding a contiguous range of bits from it. -  unsigned InputBitLen = BitMask.countPopulation(); -  unsigned InputBitNo = BitMask.countTrailingZeros(); -  if (BitMask.getBitWidth() - BitMask.countLeadingZeros() - InputBitNo != -      InputBitLen) -    // Not a contiguous set range of bits! -    return true; - -  // We know we're moving a contiguous range of bits from the input to the -  // output. Record which bits in the output came from which bits in the input. -  unsigned DestBitNo = InputBitNo + OverallLeftShift; -  for (unsigned I = 0; I < InputBitLen; ++I) -    BitProvenance[DestBitNo + I] = InputBitNo + I; - -  // If the destination bit value is already defined, the values are or'd -  // together, which isn't a bswap/bitreverse (unless it's an or of the same -  // bits). -  if (BitValues[DestBitNo] && BitValues[DestBitNo] != V) -    return true; -  for (unsigned I = 0; I < InputBitLen; ++I) -    BitValues[DestBitNo + I] = V; - -  return false; -} - -static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To, -                                          unsigned BitWidth) { -  if (From % 8 != To % 8) -    return false; -  // Convert from bit indices to byte indices and check for a byte reversal. -  From >>= 3; -  To >>= 3; -  BitWidth >>= 3; -  return From == BitWidth - To - 1; -} - -static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To, -                                               unsigned BitWidth) { -  return From == BitWidth - To - 1; -} -  /// Given an OR instruction, check to see if this is a bswap or bitreverse  /// idiom. If so, insert the new intrinsic and return it.  Instruction *InstCombiner::MatchBSwapOrBitReverse(BinaryOperator &I) { -  IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); -  if (!ITy) -    return nullptr;   // Can't do vectors. -  unsigned BW = ITy->getBitWidth(); -   -  /// We keep track of which bit (BitProvenance) inside which value (BitValues) -  /// defines each bit in the result. -  SmallVector<Value *, 8> BitValues(BW, nullptr); -  SmallVector<int, 8> BitProvenance(BW, -1); -   -  // Try to find all the pieces corresponding to the bswap. -  APInt BitMask = APInt::getAllOnesValue(BitValues.size()); -  if (CollectBitParts(&I, 0, BitMask, BitValues, BitProvenance)) -    return nullptr; - -  // Check to see if all of the bits come from the same value. -  Value *V = BitValues[0]; -  if (!V) return nullptr;  // Didn't find a bit?  Must be zero. - -  if (!std::all_of(BitValues.begin(), BitValues.end(), -                   [&](const Value *X) { return X == V; })) -    return nullptr; - -  // Now, is the bit permutation correct for a bswap or a bitreverse? We can -  // only byteswap values with an even number of bytes. -  bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true;; -  for (unsigned i = 0, e = BitValues.size(); i != e; ++i) { -    OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW); -    OKForBitReverse &= -        bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW); -  } - -  Intrinsic::ID Intrin; -  if (OKForBSwap) -    Intrin = Intrinsic::bswap; -  else if (OKForBitReverse) -    Intrin = Intrinsic::bitreverse; -  else +  SmallVector<Instruction*, 4> Insts; +  if (!recognizeBitReverseOrBSwapIdiom(&I, true, false, Insts))      return nullptr; +  Instruction *LastInst = Insts.pop_back_val(); +  LastInst->removeFromParent(); -  Function *F = Intrinsic::getDeclaration(I.getModule(), Intrin, ITy); -  return CallInst::Create(F, V); +  for (auto *Inst : Insts) +    Worklist.Add(Inst); +  return LastInst;  }  /// We have an expression of the form (A&C)|(B&D).  Check if A is (cond?-1:0) diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 14574119b9a8..79282a2a703b 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -179,13 +179,244 @@ void LandingPadInliningInfo::forwardResume(    RI->eraseFromParent();  } +/// Helper for getUnwindDestToken/getUnwindDestTokenHelper. +static Value *getParentPad(Value *EHPad) { +  if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad)) +    return FPI->getParentPad(); +  return cast<CatchSwitchInst>(EHPad)->getParentPad(); +} + +typedef DenseMap<Instruction *, Value *> UnwindDestMemoTy; + +/// Helper for getUnwindDestToken that does the descendant-ward part of +/// the search. +static Value *getUnwindDestTokenHelper(Instruction *EHPad, +                                       UnwindDestMemoTy &MemoMap) { +  SmallVector<Instruction *, 8> Worklist(1, EHPad); + +  while (!Worklist.empty()) { +    Instruction *CurrentPad = Worklist.pop_back_val(); +    // We only put pads on the worklist that aren't in the MemoMap.  When +    // we find an unwind dest for a pad we may update its ancestors, but +    // the queue only ever contains uncles/great-uncles/etc. of CurrentPad, +    // so they should never get updated while queued on the worklist. +    assert(!MemoMap.count(CurrentPad)); +    Value *UnwindDestToken = nullptr; +    if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) { +      if (CatchSwitch->hasUnwindDest()) { +        UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI(); +      } else { +        // Catchswitch doesn't have a 'nounwind' variant, and one might be +        // annotated as "unwinds to caller" when really it's nounwind (see +        // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the +        // parent's unwind dest from this.  We can check its catchpads' +        // descendants, since they might include a cleanuppad with an +        // "unwinds to caller" cleanupret, which can be trusted. +        for (auto HI = CatchSwitch->handler_begin(), +                  HE = CatchSwitch->handler_end(); +             HI != HE && !UnwindDestToken; ++HI) { +          BasicBlock *HandlerBlock = *HI; +          auto *CatchPad = cast<CatchPadInst>(HandlerBlock->getFirstNonPHI()); +          for (User *Child : CatchPad->users()) { +            // Intentionally ignore invokes here -- since the catchswitch is +            // marked "unwind to caller", it would be a verifier error if it +            // contained an invoke which unwinds out of it, so any invoke we'd +            // encounter must unwind to some child of the catch. +            if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child)) +              continue; + +            Instruction *ChildPad = cast<Instruction>(Child); +            auto Memo = MemoMap.find(ChildPad); +            if (Memo == MemoMap.end()) { +              // Haven't figure out this child pad yet; queue it. +              Worklist.push_back(ChildPad); +              continue; +            } +            // We've already checked this child, but might have found that +            // it offers no proof either way. +            Value *ChildUnwindDestToken = Memo->second; +            if (!ChildUnwindDestToken) +              continue; +            // We already know the child's unwind dest, which can either +            // be ConstantTokenNone to indicate unwind to caller, or can +            // be another child of the catchpad.  Only the former indicates +            // the unwind dest of the catchswitch. +            if (isa<ConstantTokenNone>(ChildUnwindDestToken)) { +              UnwindDestToken = ChildUnwindDestToken; +              break; +            } +            assert(getParentPad(ChildUnwindDestToken) == CatchPad); +          } +        } +      } +    } else { +      auto *CleanupPad = cast<CleanupPadInst>(CurrentPad); +      for (User *U : CleanupPad->users()) { +        if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) { +          if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest()) +            UnwindDestToken = RetUnwindDest->getFirstNonPHI(); +          else +            UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext()); +          break; +        } +        Value *ChildUnwindDestToken; +        if (auto *Invoke = dyn_cast<InvokeInst>(U)) { +          ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI(); +        } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) { +          Instruction *ChildPad = cast<Instruction>(U); +          auto Memo = MemoMap.find(ChildPad); +          if (Memo == MemoMap.end()) { +            // Haven't resolved this child yet; queue it and keep searching. +            Worklist.push_back(ChildPad); +            continue; +          } +          // We've checked this child, but still need to ignore it if it +          // had no proof either way. +          ChildUnwindDestToken = Memo->second; +          if (!ChildUnwindDestToken) +            continue; +        } else { +          // Not a relevant user of the cleanuppad +          continue; +        } +        // In a well-formed program, the child/invoke must either unwind to +        // an(other) child of the cleanup, or exit the cleanup.  In the +        // first case, continue searching. +        if (isa<Instruction>(ChildUnwindDestToken) && +            getParentPad(ChildUnwindDestToken) == CleanupPad) +          continue; +        UnwindDestToken = ChildUnwindDestToken; +        break; +      } +    } +    // If we haven't found an unwind dest for CurrentPad, we may have queued its +    // children, so move on to the next in the worklist. +    if (!UnwindDestToken) +      continue; + +    // Now we know that CurrentPad unwinds to UnwindDestToken.  It also exits +    // any ancestors of CurrentPad up to but not including UnwindDestToken's +    // parent pad.  Record this in the memo map, and check to see if the +    // original EHPad being queried is one of the ones exited. +    Value *UnwindParent; +    if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken)) +      UnwindParent = getParentPad(UnwindPad); +    else +      UnwindParent = nullptr; +    bool ExitedOriginalPad = false; +    for (Instruction *ExitedPad = CurrentPad; +         ExitedPad && ExitedPad != UnwindParent; +         ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) { +      // Skip over catchpads since they just follow their catchswitches. +      if (isa<CatchPadInst>(ExitedPad)) +        continue; +      MemoMap[ExitedPad] = UnwindDestToken; +      ExitedOriginalPad |= (ExitedPad == EHPad); +    } + +    if (ExitedOriginalPad) +      return UnwindDestToken; + +    // Continue the search. +  } + +  // No definitive information is contained within this funclet. +  return nullptr; +} + +/// Given an EH pad, find where it unwinds.  If it unwinds to an EH pad, +/// return that pad instruction.  If it unwinds to caller, return +/// ConstantTokenNone.  If it does not have a definitive unwind destination, +/// return nullptr. +/// +/// This routine gets invoked for calls in funclets in inlinees when inlining +/// an invoke.  Since many funclets don't have calls inside them, it's queried +/// on-demand rather than building a map of pads to unwind dests up front. +/// Determining a funclet's unwind dest may require recursively searching its +/// descendants, and also ancestors and cousins if the descendants don't provide +/// an answer.  Since most funclets will have their unwind dest immediately +/// available as the unwind dest of a catchswitch or cleanupret, this routine +/// searches top-down from the given pad and then up. To avoid worst-case +/// quadratic run-time given that approach, it uses a memo map to avoid +/// re-processing funclet trees.  The callers that rewrite the IR as they go +/// take advantage of this, for correctness, by checking/forcing rewritten +/// pads' entries to match the original callee view. +static Value *getUnwindDestToken(Instruction *EHPad, +                                 UnwindDestMemoTy &MemoMap) { +  // Catchpads unwind to the same place as their catchswitch; +  // redirct any queries on catchpads so the code below can +  // deal with just catchswitches and cleanuppads. +  if (auto *CPI = dyn_cast<CatchPadInst>(EHPad)) +    EHPad = CPI->getCatchSwitch(); + +  // Check if we've already determined the unwind dest for this pad. +  auto Memo = MemoMap.find(EHPad); +  if (Memo != MemoMap.end()) +    return Memo->second; + +  // Search EHPad and, if necessary, its descendants. +  Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap); +  assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0)); +  if (UnwindDestToken) +    return UnwindDestToken; + +  // No information is available for this EHPad from itself or any of its +  // descendants.  An unwind all the way out to a pad in the caller would +  // need also to agree with the unwind dest of the parent funclet, so +  // search up the chain to try to find a funclet with information.  Put +  // null entries in the memo map to avoid re-processing as we go up. +  MemoMap[EHPad] = nullptr; +  Instruction *LastUselessPad = EHPad; +  Value *AncestorToken; +  for (AncestorToken = getParentPad(EHPad); +       auto *AncestorPad = dyn_cast<Instruction>(AncestorToken); +       AncestorToken = getParentPad(AncestorToken)) { +    // Skip over catchpads since they just follow their catchswitches. +    if (isa<CatchPadInst>(AncestorPad)) +      continue; +    assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]); +    auto AncestorMemo = MemoMap.find(AncestorPad); +    if (AncestorMemo == MemoMap.end()) { +      UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap); +    } else { +      UnwindDestToken = AncestorMemo->second; +    } +    if (UnwindDestToken) +      break; +    LastUselessPad = AncestorPad; +  } + +  // Since the whole tree under LastUselessPad has no information, it all must +  // match UnwindDestToken; record that to avoid repeating the search. +  SmallVector<Instruction *, 8> Worklist(1, LastUselessPad); +  while (!Worklist.empty()) { +    Instruction *UselessPad = Worklist.pop_back_val(); +    assert(!MemoMap.count(UselessPad) || MemoMap[UselessPad] == nullptr); +    MemoMap[UselessPad] = UnwindDestToken; +    if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) { +      for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) +        for (User *U : HandlerBlock->getFirstNonPHI()->users()) +          if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) +            Worklist.push_back(cast<Instruction>(U)); +    } else { +      assert(isa<CleanupPadInst>(UselessPad)); +      for (User *U : UselessPad->users()) +        if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) +          Worklist.push_back(cast<Instruction>(U)); +    } +  } + +  return UnwindDestToken; +} +  /// When we inline a basic block into an invoke,  /// we have to turn all of the calls that can throw into invokes.  /// This function analyze BB to see if there are any calls, and if so,  /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI  /// nodes in that block with the values specified in InvokeDestPHIValues. -static BasicBlock * -HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) { +static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( +    BasicBlock *BB, BasicBlock *UnwindEdge, +    UnwindDestMemoTy *FuncletUnwindMap = nullptr) {    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {      Instruction *I = &*BBI++; @@ -196,6 +427,31 @@ HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) {      if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue()))        continue; +    if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) { +      // This call is nested inside a funclet.  If that funclet has an unwind +      // destination within the inlinee, then unwinding out of this call would +      // be UB.  Rewriting this call to an invoke which targets the inlined +      // invoke's unwind dest would give the call's parent funclet multiple +      // unwind destinations, which is something that subsequent EH table +      // generation can't handle and that the veirifer rejects.  So when we +      // see such a call, leave it as a call. +      auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]); +      Value *UnwindDestToken = +          getUnwindDestToken(FuncletPad, *FuncletUnwindMap); +      if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) +        continue; +#ifndef NDEBUG +      Instruction *MemoKey; +      if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad)) +        MemoKey = CatchPad->getCatchSwitch(); +      else +        MemoKey = FuncletPad; +      assert(FuncletUnwindMap->count(MemoKey) && +             (*FuncletUnwindMap)[MemoKey] == UnwindDestToken && +             "must get memoized to avoid confusing later searches"); +#endif // NDEBUG +    } +      // Convert this function call into an invoke instruction.  First, split the      // basic block.      BasicBlock *Split = @@ -328,13 +584,23 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,    // This connects all the instructions which 'unwind to caller' to the invoke    // destination. +  UnwindDestMemoTy FuncletUnwindMap;    for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();         BB != E; ++BB) {      if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {        if (CRI->unwindsToCaller()) { -        CleanupReturnInst::Create(CRI->getCleanupPad(), UnwindDest, CRI); +        auto *CleanupPad = CRI->getCleanupPad(); +        CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI);          CRI->eraseFromParent();          UpdatePHINodes(&*BB); +        // Finding a cleanupret with an unwind destination would confuse +        // subsequent calls to getUnwindDestToken, so map the cleanuppad +        // to short-circuit any such calls and recognize this as an "unwind +        // to caller" cleanup. +        assert(!FuncletUnwindMap.count(CleanupPad) || +               isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad])); +        FuncletUnwindMap[CleanupPad] = +            ConstantTokenNone::get(Caller->getContext());        }      } @@ -345,12 +611,41 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,      Instruction *Replacement = nullptr;      if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {        if (CatchSwitch->unwindsToCaller()) { +        Value *UnwindDestToken; +        if (auto *ParentPad = +                dyn_cast<Instruction>(CatchSwitch->getParentPad())) { +          // This catchswitch is nested inside another funclet.  If that +          // funclet has an unwind destination within the inlinee, then +          // unwinding out of this catchswitch would be UB.  Rewriting this +          // catchswitch to unwind to the inlined invoke's unwind dest would +          // give the parent funclet multiple unwind destinations, which is +          // something that subsequent EH table generation can't handle and +          // that the veirifer rejects.  So when we see such a call, leave it +          // as "unwind to caller". +          UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap); +          if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) +            continue; +        } else { +          // This catchswitch has no parent to inherit constraints from, and +          // none of its descendants can have an unwind edge that exits it and +          // targets another funclet in the inlinee.  It may or may not have a +          // descendant that definitively has an unwind to caller.  In either +          // case, we'll have to assume that any unwinds out of it may need to +          // be routed to the caller, so treat it as though it has a definitive +          // unwind to caller. +          UnwindDestToken = ConstantTokenNone::get(Caller->getContext()); +        }          auto *NewCatchSwitch = CatchSwitchInst::Create(              CatchSwitch->getParentPad(), UnwindDest,              CatchSwitch->getNumHandlers(), CatchSwitch->getName(),              CatchSwitch);          for (BasicBlock *PadBB : CatchSwitch->handlers())            NewCatchSwitch->addHandler(PadBB); +        // Propagate info for the old catchswitch over to the new one in +        // the unwind map.  This also serves to short-circuit any subsequent +        // checks for the unwind dest of this catchswitch, which would get +        // confused if they found the outer handler in the callee. +        FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken;          Replacement = NewCatchSwitch;        }      } else if (!isa<FuncletPadInst>(I)) { @@ -369,8 +664,8 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,      for (Function::iterator BB = FirstNewBlock->getIterator(),                              E = Caller->end();           BB != E; ++BB) -      if (BasicBlock *NewBB = -              HandleCallsInBlockInlinedThroughInvoke(&*BB, UnwindDest)) +      if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( +              &*BB, UnwindDest, &FuncletUnwindMap))          // Update any PHI nodes in the exceptional block to indicate that there          // is now a new entry in them.          UpdatePHINodes(NewBB); @@ -1415,6 +1710,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,      }    } +  // If we are inlining for an invoke instruction, we must make sure to rewrite +  // any call instructions into invoke instructions.  This is sensitive to which +  // funclet pads were top-level in the inlinee, so must be done before +  // rewriting the "parent pad" links. +  if (auto *II = dyn_cast<InvokeInst>(TheCall)) { +    BasicBlock *UnwindDest = II->getUnwindDest(); +    Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); +    if (isa<LandingPadInst>(FirstNonPHI)) { +      HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo); +    } else { +      HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); +    } +  } +    // Update the lexical scopes of the new funclets and callsites.    // Anything that had 'none' as its parent is now nested inside the callsite's    // EHPad. @@ -1472,18 +1781,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,      }    } -  // If we are inlining for an invoke instruction, we must make sure to rewrite -  // any call instructions into invoke instructions. -  if (auto *II = dyn_cast<InvokeInst>(TheCall)) { -    BasicBlock *UnwindDest = II->getUnwindDest(); -    Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); -    if (isa<LandingPadInst>(FirstNonPHI)) { -      HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo); -    } else { -      HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); -    } -  } -    // Handle any inlined musttail call sites.  In order for a new call site to be    // musttail, the source of the clone and the inlined call site must have been    // musttail.  Therefore it's safe to return without merging control into the diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index d2793e5ecb5b..abc9b65f7a39 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -944,37 +944,44 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {  static unsigned enforceKnownAlignment(Value *V, unsigned Align,                                        unsigned PrefAlign,                                        const DataLayout &DL) { +  assert(PrefAlign > Align); +    V = V->stripPointerCasts();    if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { +    // TODO: ideally, computeKnownBits ought to have used +    // AllocaInst::getAlignment() in its computation already, making +    // the below max redundant. But, as it turns out, +    // stripPointerCasts recurses through infinite layers of bitcasts, +    // while computeKnownBits is not allowed to traverse more than 6 +    // levels. +    Align = std::max(AI->getAlignment(), Align); +    if (PrefAlign <= Align) +      return Align; +      // If the preferred alignment is greater than the natural stack alignment      // then don't round up. This avoids dynamic stack realignment.      if (DL.exceedsNaturalStackAlignment(PrefAlign))        return Align; -    // If there is a requested alignment and if this is an alloca, round up. -    if (AI->getAlignment() >= PrefAlign) -      return AI->getAlignment();      AI->setAlignment(PrefAlign);      return PrefAlign;    }    if (auto *GO = dyn_cast<GlobalObject>(V)) { +    // TODO: as above, this shouldn't be necessary. +    Align = std::max(GO->getAlignment(), Align); +    if (PrefAlign <= Align) +      return Align; +      // If there is a large requested alignment and we can, bump up the alignment      // of the global.  If the memory we set aside for the global may not be the      // memory used by the final program then it is impossible for us to reliably      // enforce the preferred alignment. -    if (!GO->isStrongDefinitionForLinker()) +    if (!GO->canIncreaseAlignment())        return Align; -    if (GO->getAlignment() >= PrefAlign) -      return GO->getAlignment(); -    // We can only increase the alignment of the global if it has no alignment -    // specified or if it is not assigned a section.  If it is assigned a -    // section, the global could be densely packed with other objects in the -    // section, increasing the alignment could cause padding issues. -    if (!GO->hasSection() || GO->getAlignment() == 0) -      GO->setAlignment(PrefAlign); -    return GO->getAlignment(); +    GO->setAlignment(PrefAlign); +    return PrefAlign;    }    return Align; @@ -1585,3 +1592,205 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS) {    return false;  } + +/// A potential constituent of a bitreverse or bswap expression. See +/// collectBitParts for a fuller explanation. +struct BitPart { +  BitPart(Value *P, unsigned BW) : Provider(P) { +    Provenance.resize(BW); +  } + +  /// The Value that this is a bitreverse/bswap of. +  Value *Provider; +  /// The "provenance" of each bit. Provenance[A] = B means that bit A +  /// in Provider becomes bit B in the result of this expression. +  SmallVector<int8_t, 32> Provenance; // int8_t means max size is i128. + +  enum { Unset = -1 }; +}; + +/// Analyze the specified subexpression and see if it is capable of providing +/// pieces of a bswap or bitreverse. The subexpression provides a potential +/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in +/// the output of the expression came from a corresponding bit in some other +/// value. This function is recursive, and the end result is a mapping of +/// bitnumber to bitnumber. It is the caller's responsibility to validate that +/// the bitnumber to bitnumber mapping is correct for a bswap or bitreverse. +/// +/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know +/// that the expression deposits the low byte of %X into the high byte of the +/// result and that all other bits are zero. This expression is accepted and a +/// BitPart is returned with Provider set to %X and Provenance[24-31] set to +/// [0-7]. +/// +/// To avoid revisiting values, the BitPart results are memoized into the +/// provided map. To avoid unnecessary copying of BitParts, BitParts are +/// constructed in-place in the \c BPS map. Because of this \c BPS needs to +/// store BitParts objects, not pointers. As we need the concept of a nullptr +/// BitParts (Value has been analyzed and the analysis failed), we an Optional +/// type instead to provide the same functionality. +/// +/// Because we pass around references into \c BPS, we must use a container that +/// does not invalidate internal references (std::map instead of DenseMap). +/// +static const Optional<BitPart> & +collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, +                std::map<Value *, Optional<BitPart>> &BPS) { +  auto I = BPS.find(V); +  if (I != BPS.end()) +    return I->second; + +  auto &Result = BPS[V] = None; +  auto BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); + +  if (Instruction *I = dyn_cast<Instruction>(V)) { +    // If this is an or instruction, it may be an inner node of the bswap. +    if (I->getOpcode() == Instruction::Or) { +      auto &A = collectBitParts(I->getOperand(0), MatchBSwaps, +                                MatchBitReversals, BPS); +      auto &B = collectBitParts(I->getOperand(1), MatchBSwaps, +                                MatchBitReversals, BPS); +      if (!A || !B) +        return Result; + +      // Try and merge the two together. +      if (!A->Provider || A->Provider != B->Provider) +        return Result; + +      Result = BitPart(A->Provider, BitWidth); +      for (unsigned i = 0; i < A->Provenance.size(); ++i) { +        if (A->Provenance[i] != BitPart::Unset && +            B->Provenance[i] != BitPart::Unset && +            A->Provenance[i] != B->Provenance[i]) +          return Result = None; + +        if (A->Provenance[i] == BitPart::Unset) +          Result->Provenance[i] = B->Provenance[i]; +        else +          Result->Provenance[i] = A->Provenance[i]; +      } + +      return Result; +    } + +    // If this is a logical shift by a constant, recurse then shift the result. +    if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) { +      unsigned BitShift = +          cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U); +      // Ensure the shift amount is defined. +      if (BitShift > BitWidth) +        return Result; + +      auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, +                                  MatchBitReversals, BPS); +      if (!Res) +        return Result; +      Result = Res; + +      // Perform the "shift" on BitProvenance. +      auto &P = Result->Provenance; +      if (I->getOpcode() == Instruction::Shl) { +        P.erase(std::prev(P.end(), BitShift), P.end()); +        P.insert(P.begin(), BitShift, BitPart::Unset); +      } else { +        P.erase(P.begin(), std::next(P.begin(), BitShift)); +        P.insert(P.end(), BitShift, BitPart::Unset); +      } + +      return Result; +    } + +    // If this is a logical 'and' with a mask that clears bits, recurse then +    // unset the appropriate bits. +    if (I->getOpcode() == Instruction::And && +        isa<ConstantInt>(I->getOperand(1))) { +      APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1); +      const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue(); + +      // Check that the mask allows a multiple of 8 bits for a bswap, for an +      // early exit. +      unsigned NumMaskedBits = AndMask.countPopulation(); +      if (!MatchBitReversals && NumMaskedBits % 8 != 0) +        return Result; +       +      auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, +                                  MatchBitReversals, BPS); +      if (!Res) +        return Result; +      Result = Res; + +      for (unsigned i = 0; i < BitWidth; ++i, Bit <<= 1) +        // If the AndMask is zero for this bit, clear the bit. +        if ((AndMask & Bit) == 0) +          Result->Provenance[i] = BitPart::Unset; + +      return Result; +    } +  } + +  // Okay, we got to something that isn't a shift, 'or' or 'and'.  This must be +  // the input value to the bswap/bitreverse. +  Result = BitPart(V, BitWidth); +  for (unsigned i = 0; i < BitWidth; ++i) +    Result->Provenance[i] = i; +  return Result; +} + +static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To, +                                          unsigned BitWidth) { +  if (From % 8 != To % 8) +    return false; +  // Convert from bit indices to byte indices and check for a byte reversal. +  From >>= 3; +  To >>= 3; +  BitWidth >>= 3; +  return From == BitWidth - To - 1; +} + +static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To, +                                               unsigned BitWidth) { +  return From == BitWidth - To - 1; +} + +/// Given an OR instruction, check to see if this is a bitreverse +/// idiom. If so, insert the new intrinsic and return true. +bool llvm::recognizeBitReverseOrBSwapIdiom( +    Instruction *I, bool MatchBSwaps, bool MatchBitReversals, +    SmallVectorImpl<Instruction *> &InsertedInsts) { +  if (Operator::getOpcode(I) != Instruction::Or) +    return false; +  if (!MatchBSwaps && !MatchBitReversals) +    return false; +  IntegerType *ITy = dyn_cast<IntegerType>(I->getType()); +  if (!ITy || ITy->getBitWidth() > 128) +    return false;   // Can't do vectors or integers > 128 bits. +  unsigned BW = ITy->getBitWidth(); + +  // Try to find all the pieces corresponding to the bswap. +  std::map<Value *, Optional<BitPart>> BPS; +  auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS); +  if (!Res) +    return false; +  auto &BitProvenance = Res->Provenance; + +  // Now, is the bit permutation correct for a bswap or a bitreverse? We can +  // only byteswap values with an even number of bytes. +  bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true; +  for (unsigned i = 0; i < BW; ++i) { +    OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW); +    OKForBitReverse &= +        bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW); +  } + +  Intrinsic::ID Intrin; +  if (OKForBSwap && MatchBSwaps) +    Intrin = Intrinsic::bswap; +  else if (OKForBitReverse && MatchBitReversals) +    Intrin = Intrinsic::bitreverse; +  else +    return false; + +  Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, ITy); +  InsertedInsts.push_back(CallInst::Create(F, Res->Provider, "rev", I)); +  return true; +} diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index dc0744060142..2f3c31128cf0 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -970,15 +970,34 @@ static Value *valueHasFloatPrecision(Value *Val) {    return nullptr;  } -//===----------------------------------------------------------------------===// -// Double -> Float Shrinking Optimizations for Unary Functions like 'floor' +/// Any floating-point library function that we're trying to simplify will have +/// a signature of the form: fptype foo(fptype param1, fptype param2, ...). +/// CheckDoubleTy indicates that 'fptype' must be 'double'. +static bool matchesFPLibFunctionSignature(const Function *F, unsigned NumParams, +                                          bool CheckDoubleTy) { +  FunctionType *FT = F->getFunctionType(); +  if (FT->getNumParams() != NumParams) +    return false; + +  // The return type must match what we're looking for. +  Type *RetTy = FT->getReturnType(); +  if (CheckDoubleTy ? !RetTy->isDoubleTy() : !RetTy->isFloatingPointTy()) +    return false; + +  // Each parameter must match the return type, and therefore, match every other +  // parameter too. +  for (const Type *ParamTy : FT->params()) +    if (ParamTy != RetTy) +      return false; -Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, -                                                bool CheckRetType) { +  return true; +} + +/// Shrink double -> float for unary functions like 'floor'. +static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, +                                    bool CheckRetType) {    Function *Callee = CI->getCalledFunction(); -  FunctionType *FT = Callee->getFunctionType(); -  if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || -      !FT->getParamType(0)->isDoubleTy()) +  if (!matchesFPLibFunctionSignature(Callee, 1, true))      return nullptr;    if (CheckRetType) { @@ -1013,15 +1032,10 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,    return B.CreateFPExt(V, B.getDoubleTy());  } -// Double -> Float Shrinking Optimizations for Binary Functions like 'fmin/fmax' -Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { +/// Shrink double -> float for binary functions like 'fmin/fmax'. +static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {    Function *Callee = CI->getCalledFunction(); -  FunctionType *FT = Callee->getFunctionType(); -  // Just make sure this has 2 arguments of the same FP type, which match the -  // result type. -  if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || -      FT->getParamType(0) != FT->getParamType(1) || -      !FT->getParamType(0)->isFloatingPointTy()) +  if (!matchesFPLibFunctionSignature(Callee, 2, true))      return nullptr;    // If this is something like 'fmin((double)floatval1, (double)floatval2)', @@ -1394,12 +1408,21 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {  Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {    Function *Callee = CI->getCalledFunction(); -   +    Value *Ret = nullptr;    if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" ||                                     Callee->getIntrinsicID() == Intrinsic::sqrt))      Ret = optimizeUnaryDoubleFP(CI, B, true); +  // FIXME: Refactor - this check is repeated all over this file and even in the +  // preceding call to shrink double -> float. + +  // Make sure this has 1 argument of FP type, which matches the result type. +  FunctionType *FT = Callee->getFunctionType(); +  if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || +      !FT->getParamType(0)->isFloatingPointTy()) +    return Ret; +    if (!CI->hasUnsafeAlgebra())      return Ret; | 
