diff options
Diffstat (limited to 'lib/Transforms/Utils')
| -rw-r--r-- | lib/Transforms/Utils/CloneFunction.cpp | 20 | ||||
| -rw-r--r-- | lib/Transforms/Utils/CmpInstAnalysis.cpp | 2 | ||||
| -rw-r--r-- | lib/Transforms/Utils/CodeExtractor.cpp | 6 | ||||
| -rw-r--r-- | lib/Transforms/Utils/Evaluator.cpp | 2 | ||||
| -rw-r--r-- | lib/Transforms/Utils/FunctionComparator.cpp | 18 | ||||
| -rw-r--r-- | lib/Transforms/Utils/Local.cpp | 18 | ||||
| -rw-r--r-- | lib/Transforms/Utils/LoopUnrollRuntime.cpp | 143 | ||||
| -rw-r--r-- | lib/Transforms/Utils/LowerMemIntrinsics.cpp | 288 | ||||
| -rw-r--r-- | lib/Transforms/Utils/SimplifyCFG.cpp | 4 | ||||
| -rw-r--r-- | lib/Transforms/Utils/SimplifyIndVar.cpp | 47 | ||||
| -rw-r--r-- | lib/Transforms/Utils/SimplifyLibCalls.cpp | 4 | ||||
| -rw-r--r-- | lib/Transforms/Utils/VNCoercion.cpp | 15 | 
12 files changed, 469 insertions, 98 deletions
| diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 314c990293cc5..7e75e88477852 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -46,13 +46,21 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,    if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);    bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; -   +  Module *TheModule = F ? F->getParent() : nullptr; +    // Loop over all instructions, and copy them over.    for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();         II != IE; ++II) { -    if (DIFinder && F->getParent() && II->getDebugLoc()) -      DIFinder->processLocation(*F->getParent(), II->getDebugLoc().get()); +    if (DIFinder && TheModule) { +      if (auto *DDI = dyn_cast<DbgDeclareInst>(II)) +        DIFinder->processDeclare(*TheModule, DDI); +      else if (auto *DVI = dyn_cast<DbgValueInst>(II)) +        DIFinder->processValue(*TheModule, DVI); + +      if (auto DbgLoc = II->getDebugLoc()) +        DIFinder->processLocation(*TheModule, DbgLoc.get()); +    }      Instruction *NewInst = II->clone();      if (II->hasName()) @@ -153,6 +161,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,    // When we remap instructions, we want to avoid duplicating inlined    // DISubprograms, so record all subprograms we find as we duplicate    // instructions and then freeze them in the MD map. +  // We also record information about dbg.value and dbg.declare to avoid +  // duplicating the types.    DebugInfoFinder DIFinder;    // Loop over all of the basic blocks in the function, cloning them as @@ -193,6 +203,10 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,      }    } +  for (auto *Type : DIFinder.types()) { +    VMap.MD()[Type].reset(Type); +  } +    // Loop over all of the instructions in the function, fixing up operand    // references as we go.  This uses VMap to do all the hard work.    for (Function::iterator BB = diff --git a/lib/Transforms/Utils/CmpInstAnalysis.cpp b/lib/Transforms/Utils/CmpInstAnalysis.cpp index 9f4d9c7e39810..d9294c4993091 100644 --- a/lib/Transforms/Utils/CmpInstAnalysis.cpp +++ b/lib/Transforms/Utils/CmpInstAnalysis.cpp @@ -81,7 +81,7 @@ bool llvm::decomposeBitTestICmp(const ICmpInst *I, CmpInst::Predicate &Pred,      break;    case ICmpInst::ICMP_SGT:      // X > -1 is equivalent to (X & SignMask) == 0. -    if (!C->isAllOnesValue()) +    if (!C->isMinusOne())        return false;      Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth()));      Pred = ICmpInst::ICMP_EQ; diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 30d8856cfbef1..1189714dfab10 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -1116,12 +1116,6 @@ Function *CodeExtractor::extractCodeRegion() {          }      } -  //cerr << "NEW FUNCTION: " << *newFunction; -  //  verifyFunction(*newFunction); - -  //  cerr << "OLD FUNCTION: " << *oldFunction; -  //  verifyFunction(*oldFunction); -    DEBUG(if (verifyFunction(*newFunction))           report_fatal_error("verifyFunction failed!"));    return newFunction; diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp index c97e544e620a9..1328f2f3ec012 100644 --- a/lib/Transforms/Utils/Evaluator.cpp +++ b/lib/Transforms/Utils/Evaluator.cpp @@ -402,7 +402,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,            Value *Ptr = PtrArg->stripPointerCasts();            if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {              Type *ElemTy = GV->getValueType(); -            if (!Size->isAllOnesValue() && +            if (!Size->isMinusOne() &&                  Size->getValue().getLimitedValue() >=                      DL.getTypeStoreSize(ElemTy)) {                Invariants.insert(GV); diff --git a/lib/Transforms/Utils/FunctionComparator.cpp b/lib/Transforms/Utils/FunctionComparator.cpp index 0457294361b56..4a2be3a531767 100644 --- a/lib/Transforms/Utils/FunctionComparator.cpp +++ b/lib/Transforms/Utils/FunctionComparator.cpp @@ -513,8 +513,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,      if (int Res =              cmpOrderings(LI->getOrdering(), cast<LoadInst>(R)->getOrdering()))        return Res; -    if (int Res = -            cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope())) +    if (int Res = cmpNumbers(LI->getSyncScopeID(), +                             cast<LoadInst>(R)->getSyncScopeID()))        return Res;      return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range),          cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range)); @@ -529,7 +529,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,      if (int Res =              cmpOrderings(SI->getOrdering(), cast<StoreInst>(R)->getOrdering()))        return Res; -    return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope()); +    return cmpNumbers(SI->getSyncScopeID(), +                      cast<StoreInst>(R)->getSyncScopeID());    }    if (const CmpInst *CI = dyn_cast<CmpInst>(L))      return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate()); @@ -584,7 +585,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,      if (int Res =              cmpOrderings(FI->getOrdering(), cast<FenceInst>(R)->getOrdering()))        return Res; -    return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope()); +    return cmpNumbers(FI->getSyncScopeID(), +                      cast<FenceInst>(R)->getSyncScopeID());    }    if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) {      if (int Res = cmpNumbers(CXI->isVolatile(), @@ -601,8 +603,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,              cmpOrderings(CXI->getFailureOrdering(),                           cast<AtomicCmpXchgInst>(R)->getFailureOrdering()))        return Res; -    return cmpNumbers(CXI->getSynchScope(), -                      cast<AtomicCmpXchgInst>(R)->getSynchScope()); +    return cmpNumbers(CXI->getSyncScopeID(), +                      cast<AtomicCmpXchgInst>(R)->getSyncScopeID());    }    if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) {      if (int Res = cmpNumbers(RMWI->getOperation(), @@ -614,8 +616,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,      if (int Res = cmpOrderings(RMWI->getOrdering(),                               cast<AtomicRMWInst>(R)->getOrdering()))        return Res; -    return cmpNumbers(RMWI->getSynchScope(), -                      cast<AtomicRMWInst>(R)->getSynchScope()); +    return cmpNumbers(RMWI->getSyncScopeID(), +                      cast<AtomicRMWInst>(R)->getSyncScopeID());    }    if (const PHINode *PNL = dyn_cast<PHINode>(L)) {      const PHINode *PNR = cast<PHINode>(R); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 5127eba3f9aea..74610613001c6 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -1662,9 +1662,10 @@ void llvm::removeUnwindEdge(BasicBlock *BB) {    TI->eraseFromParent();  } -/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even +/// removeUnreachableBlocks - Remove blocks that are not reachable, even  /// if they are in a dead cycle.  Return true if a change was made, false -/// otherwise. +/// otherwise. If `LVI` is passed, this function preserves LazyValueInfo +/// after modifying the CFG.  bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) {    SmallPtrSet<BasicBlock*, 16> Reachable;    bool Changed = markAliveBlocks(F, Reachable); @@ -2168,6 +2169,9 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {      return true;    case Instruction::Call:    case Instruction::Invoke: +    // Can't handle inline asm. Skip it. +    if (isa<InlineAsm>(ImmutableCallSite(I).getCalledValue())) +      return false;      // Many arithmetic intrinsics have no issue taking a      // variable, however it's hard to distingish these from      // specials such as @llvm.frameaddress that require a constant. @@ -2182,12 +2186,18 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {    case Instruction::ShuffleVector:      // Shufflevector masks are constant.      return OpIdx != 2; +  case Instruction::Switch:    case Instruction::ExtractValue: -  case Instruction::InsertValue:      // All operands apart from the first are constant.      return OpIdx == 0; +  case Instruction::InsertValue: +    // All operands apart from the first and the second are constant. +    return OpIdx < 2;    case Instruction::Alloca: -    return false; +    // Static allocas (constant size in the entry block) are handled by +    // prologue/epilogue insertion so they're free anyway. We definitely don't +    // want to make them non-constant. +    return !dyn_cast<AllocaInst>(I)->isStaticAlloca();    case Instruction::GetElementPtr:      if (OpIdx == 0)        return true; diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 9ad2b707e6b23..5170c68e2915a 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -65,9 +65,11 @@ static cl::opt<bool> UnrollRuntimeMultiExit(  ///   than the unroll factor.  ///  static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, -                          BasicBlock *PrologExit, BasicBlock *PreHeader, -                          BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, -                          DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { +                          BasicBlock *PrologExit, +                          BasicBlock *OriginalLoopLatchExit, +                          BasicBlock *PreHeader, BasicBlock *NewPreHeader, +                          ValueToValueMapTy &VMap, DominatorTree *DT, +                          LoopInfo *LI, bool PreserveLCSSA) {    BasicBlock *Latch = L->getLoopLatch();    assert(Latch && "Loop must have a latch");    BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]); @@ -142,17 +144,15 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,    // then (BECount + 1) cannot unsigned-overflow.    Value *BrLoopExit =        B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)); -  BasicBlock *Exit = L->getUniqueExitBlock(); -  assert(Exit && "Loop must have a single exit block only");    // Split the exit to maintain loop canonicalization guarantees -  SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); -  SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI, +  SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit)); +  SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,                           PreserveLCSSA);    // Add the branch to the exit block (around the unrolled loop) -  B.CreateCondBr(BrLoopExit, Exit, NewPreHeader); +  B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);    InsertPt->eraseFromParent();    if (DT) -    DT->changeImmediateDominator(Exit, PrologExit); +    DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit);  }  /// Connect the unrolling epilog code to the original loop. @@ -427,6 +427,50 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,      return nullptr;  } +/// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits +/// is populated with all the loop exit blocks other than the LatchExit block. +static bool +canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, +                             BasicBlock *LatchExit, bool PreserveLCSSA, +                             bool UseEpilogRemainder) { + +  // Support runtime unrolling for multiple exit blocks and multiple exiting +  // blocks. +  if (!UnrollRuntimeMultiExit) +    return false; +  // Even if runtime multi exit is enabled, we currently have some correctness +  // constrains in unrolling a multi-exit loop. +  // We rely on LCSSA form being preserved when the exit blocks are transformed. +  if (!PreserveLCSSA) +    return false; +  SmallVector<BasicBlock *, 4> Exits; +  L->getUniqueExitBlocks(Exits); +  for (auto *BB : Exits) +    if (BB != LatchExit) +      OtherExits.push_back(BB); + +  // TODO: Support multiple exiting blocks jumping to the `LatchExit` when +  // UnrollRuntimeMultiExit is true. This will need updating the logic in +  // connectEpilog/connectProlog. +  if (!LatchExit->getSinglePredecessor()) { +    DEBUG(dbgs() << "Bailout for multi-exit handling when latch exit has >1 " +                    "predecessor.\n"); +    return false; +  } +  // FIXME: We bail out of multi-exit unrolling when epilog loop is generated +  // and L is an inner loop. This is because in presence of multiple exits, the +  // outer loop is incorrect: we do not add the EpilogPreheader and exit to the +  // outer loop. This is automatically handled in the prolog case, so we do not +  // have that bug in prolog generation. +  if (UseEpilogRemainder && L->getParentLoop()) +    return false; + +  // All constraints have been satisfied. +  return true; +} + + +  /// Insert code in the prolog/epilog code when unrolling a loop with a  /// run-time trip-count.  /// @@ -470,53 +514,40 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,                                        bool UseEpilogRemainder,                                        LoopInfo *LI, ScalarEvolution *SE,                                        DominatorTree *DT, bool PreserveLCSSA) { -  // for now, only unroll loops that contain a single exit -  if (!UnrollRuntimeMultiExit && !L->getExitingBlock()) -    return false; +  DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); +  DEBUG(L->dump());    // Make sure the loop is in canonical form. -  if (!L->isLoopSimplifyForm()) +  if (!L->isLoopSimplifyForm()) { +    DEBUG(dbgs() << "Not in simplify form!\n");      return false; +  }    // Guaranteed by LoopSimplifyForm.    BasicBlock *Latch = L->getLoopLatch();    BasicBlock *Header = L->getHeader(); -  BasicBlock *LatchExit = L->getUniqueExitBlock(); // successor out of loop -  if (!LatchExit && !UnrollRuntimeMultiExit) -    return false; -  // These are exit blocks other than the target of the latch exiting block. -  SmallVector<BasicBlock *, 4> OtherExits;    BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); -  unsigned int ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; +  unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; +  BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);    // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the    // targets of the Latch be an exit block out of the loop. This needs    // to be guaranteed by the callers of UnrollRuntimeLoopRemainder. -  assert(!L->contains(LatchBR->getSuccessor(ExitIndex)) && +  assert(!L->contains(LatchExit) &&           "one of the loop latch successors should be the exit block!"); -  // Support runtime unrolling for multiple exit blocks and multiple exiting -  // blocks. -  if (!LatchExit) { -    assert(UseEpilogRemainder && "Multi exit unrolling is currently supported " -                                 "unrolling with epilog remainder only!"); -    LatchExit = LatchBR->getSuccessor(ExitIndex); -    // We rely on LCSSA form being preserved when the exit blocks are -    // transformed. -    if (!PreserveLCSSA) -      return false; -    // TODO: Support multiple exiting blocks jumping to the `LatchExit`. This -    // will need updating the logic in connectEpilog. -    if (!LatchExit->getSinglePredecessor()) -        return false; -    SmallVector<BasicBlock *, 4> Exits; -    L->getUniqueExitBlocks(Exits); -    for (auto *BB : Exits) -      if (BB != LatchExit) -        OtherExits.push_back(BB); +  // These are exit blocks other than the target of the latch exiting block. +  SmallVector<BasicBlock *, 4> OtherExits; +  bool isMultiExitUnrollingEnabled = canSafelyUnrollMultiExitLoop( +      L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder); +  // Support only single exit and exiting block unless multi-exit loop unrolling is enabled. +  if (!isMultiExitUnrollingEnabled && +      (!L->getExitingBlock() || OtherExits.size())) { +    DEBUG( +        dbgs() +        << "Multiple exit/exiting blocks in loop and multi-exit unrolling not " +           "enabled!\n"); +    return false;    } - -  assert(LatchExit && "Latch Exit should exist!"); -    // Use Scalar Evolution to compute the trip count. This allows more loops to    // be unrolled than relying on induction var simplification.    if (!SE) @@ -530,29 +561,38 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,    // exiting blocks).    const SCEV *BECountSC = SE->getExitCount(L, Latch);    if (isa<SCEVCouldNotCompute>(BECountSC) || -      !BECountSC->getType()->isIntegerTy()) +      !BECountSC->getType()->isIntegerTy()) { +    DEBUG(dbgs() << "Could not compute exit block SCEV\n");      return false; +  }    unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();    // Add 1 since the backedge count doesn't include the first loop iteration.    const SCEV *TripCountSC =        SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); -  if (isa<SCEVCouldNotCompute>(TripCountSC)) +  if (isa<SCEVCouldNotCompute>(TripCountSC)) { +    DEBUG(dbgs() << "Could not compute trip count SCEV.\n");      return false; +  }    BasicBlock *PreHeader = L->getLoopPreheader();    BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());    const DataLayout &DL = Header->getModule()->getDataLayout();    SCEVExpander Expander(*SE, DL, "loop-unroll");    if (!AllowExpensiveTripCount && -      Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) +      Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) { +    DEBUG(dbgs() << "High cost for expanding trip count scev!\n");      return false; +  }    // This constraint lets us deal with an overflowing trip count easily; see the    // comment on ModVal below. -  if (Log2_32(Count) > BEWidth) +  if (Log2_32(Count) > BEWidth) { +    DEBUG(dbgs() +          << "Count failed constraint on overflow trip count calculation.\n");      return false; +  }    // Loop structure is the following:    // @@ -711,11 +751,10 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,       // node.       for (unsigned i =0; i < oldNumOperands; i++){         Value *newVal = VMap[Phi->getIncomingValue(i)]; -       if (!newVal) { -         assert(isa<Constant>(Phi->getIncomingValue(i)) && -                "VMap should exist for all values except constants!"); +       // newVal can be a constant or derived from values outside the loop, and +       // hence need not have a VMap value. +       if (!newVal)           newVal = Phi->getIncomingValue(i); -       }         Phi->addIncoming(newVal,                             cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));       } @@ -781,8 +820,8 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,    } else {      // Connect the prolog code to the original loop and update the      // PHI functions. -    ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader, -                  VMap, DT, LI, PreserveLCSSA); +    ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader, +                  NewPreHeader, VMap, DT, LI, PreserveLCSSA);    }    // If this loop is nested, then the loop unroller changes the code in the diff --git a/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/lib/Transforms/Utils/LowerMemIntrinsics.cpp index 1c2a60a6b8b24..900450b400612 100644 --- a/lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ b/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -8,12 +8,256 @@  //===----------------------------------------------------------------------===//  #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include "llvm/Analysis/TargetTransformInfo.h"  #include "llvm/IR/IRBuilder.h"  #include "llvm/IR/IntrinsicInst.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  using namespace llvm; +static unsigned getLoopOperandSizeInBytes(Type *Type) { +  if (VectorType *VTy = dyn_cast<VectorType>(Type)) { +    return VTy->getBitWidth() / 8; +  } + +  return Type->getPrimitiveSizeInBits() / 8; +} + +void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, +                                     Value *DstAddr, ConstantInt *CopyLen, +                                     unsigned SrcAlign, unsigned DestAlign, +                                     bool SrcIsVolatile, bool DstIsVolatile, +                                     const TargetTransformInfo &TTI) { +  // No need to expand zero length copies. +  if (CopyLen->isZero()) +    return; + +  BasicBlock *PreLoopBB = InsertBefore->getParent(); +  BasicBlock *PostLoopBB = nullptr; +  Function *ParentFunc = PreLoopBB->getParent(); +  LLVMContext &Ctx = PreLoopBB->getContext(); + +  Type *TypeOfCopyLen = CopyLen->getType(); +  Type *LoopOpType = +      TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign); + +  unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType); +  uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize; + +  unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); +  unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); + +  if (LoopEndCount != 0) { +    // Split +    PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split"); +    BasicBlock *LoopBB = +        BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB); +    PreLoopBB->getTerminator()->setSuccessor(0, LoopBB); + +    IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); + +    // Cast the Src and Dst pointers to pointers to the loop operand type (if +    // needed). +    PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); +    PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); +    if (SrcAddr->getType() != SrcOpType) { +      SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); +    } +    if (DstAddr->getType() != DstOpType) { +      DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); +    } + +    IRBuilder<> LoopBuilder(LoopBB); +    PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index"); +    LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB); +    // Loop Body +    Value *SrcGEP = +        LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); +    Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile); +    Value *DstGEP = +        LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); +    LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); + +    Value *NewIndex = +        LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U)); +    LoopIndex->addIncoming(NewIndex, LoopBB); + +    // Create the loop branch condition. +    Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount); +    LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI), +                             LoopBB, PostLoopBB); +  } + +  uint64_t BytesCopied = LoopEndCount * LoopOpSize; +  uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied; +  if (RemainingBytes) { +    IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI() +                                    : InsertBefore); + +    // Update the alignment based on the copy size used in the loop body. +    SrcAlign = std::min(SrcAlign, LoopOpSize); +    DestAlign = std::min(DestAlign, LoopOpSize); + +    SmallVector<Type *, 5> RemainingOps; +    TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, +                                          SrcAlign, DestAlign); + +    for (auto OpTy : RemainingOps) { +      // Calaculate the new index +      unsigned OperandSize = getLoopOperandSizeInBytes(OpTy); +      uint64_t GepIndex = BytesCopied / OperandSize; +      assert(GepIndex * OperandSize == BytesCopied && +             "Division should have no Remainder!"); +      // Cast source to operand type and load +      PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS); +      Value *CastedSrc = SrcAddr->getType() == SrcPtrType +                             ? SrcAddr +                             : RBuilder.CreateBitCast(SrcAddr, SrcPtrType); +      Value *SrcGEP = RBuilder.CreateInBoundsGEP( +          OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex)); +      Value *Load = RBuilder.CreateLoad(SrcGEP, SrcIsVolatile); + +      // Cast destination to operand type and store. +      PointerType *DstPtrType = PointerType::get(OpTy, DstAS); +      Value *CastedDst = DstAddr->getType() == DstPtrType +                             ? DstAddr +                             : RBuilder.CreateBitCast(DstAddr, DstPtrType); +      Value *DstGEP = RBuilder.CreateInBoundsGEP( +          OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex)); +      RBuilder.CreateStore(Load, DstGEP, DstIsVolatile); + +      BytesCopied += OperandSize; +    } +  } +  assert(BytesCopied == CopyLen->getZExtValue() && +         "Bytes copied should match size in the call!"); +} + +void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, +                                       Value *SrcAddr, Value *DstAddr, +                                       Value *CopyLen, unsigned SrcAlign, +                                       unsigned DestAlign, bool SrcIsVolatile, +                                       bool DstIsVolatile, +                                       const TargetTransformInfo &TTI) { +  BasicBlock *PreLoopBB = InsertBefore->getParent(); +  BasicBlock *PostLoopBB = +      PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion"); + +  Function *ParentFunc = PreLoopBB->getParent(); +  LLVMContext &Ctx = PreLoopBB->getContext(); + +  Type *LoopOpType = +      TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign); +  unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType); + +  IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); + +  unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); +  unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); +  PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); +  PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); +  if (SrcAddr->getType() != SrcOpType) { +    SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); +  } +  if (DstAddr->getType() != DstOpType) { +    DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); +  } + +  // Calculate the loop trip count, and remaining bytes to copy after the loop. +  Type *CopyLenType = CopyLen->getType(); +  IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType); +  assert(ILengthType && +         "expected size argument to memcpy to be an integer type!"); +  ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); +  Value *RuntimeLoopCount = PLBuilder.CreateUDiv(CopyLen, CILoopOpSize); +  Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); +  Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); + +  BasicBlock *LoopBB = +      BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, nullptr); +  IRBuilder<> LoopBuilder(LoopBB); + +  PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index"); +  LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); + +  Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); +  Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile); +  Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); +  LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); + +  Value *NewIndex = +      LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); +  LoopIndex->addIncoming(NewIndex, LoopBB); + +  Type *Int8Type = Type::getInt8Ty(Ctx); +  if (LoopOpType != Int8Type) { +    // Loop body for the residual copy. +    BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual", +                                               PreLoopBB->getParent(), nullptr); +    // Residual loop header. +    BasicBlock *ResHeaderBB = BasicBlock::Create( +        Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr); + +    // Need to update the pre-loop basic block to branch to the correct place. +    // branch to the main loop if the count is non-zero, branch to the residual +    // loop if the copy size is smaller then 1 iteration of the main loop but +    // non-zero and finally branch to after the residual loop if the memcpy +    //  size is zero. +    ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); +    PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), +                           LoopBB, ResHeaderBB); +    PreLoopBB->getTerminator()->eraseFromParent(); + +    LoopBuilder.CreateCondBr( +        LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, +        ResHeaderBB); + +    // Determine if we need to branch to the residual loop or bypass it. +    IRBuilder<> RHBuilder(ResHeaderBB); +    RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero), +                           ResLoopBB, PostLoopBB); + +    // Copy the residual with single byte load/store loop. +    IRBuilder<> ResBuilder(ResLoopBB); +    PHINode *ResidualIndex = +        ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index"); +    ResidualIndex->addIncoming(Zero, ResHeaderBB); + +    Value *SrcAsInt8 = +        ResBuilder.CreateBitCast(SrcAddr, PointerType::get(Int8Type, SrcAS)); +    Value *DstAsInt8 = +        ResBuilder.CreateBitCast(DstAddr, PointerType::get(Int8Type, DstAS)); +    Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); +    Value *SrcGEP = +        ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset); +    Value *Load = ResBuilder.CreateLoad(SrcGEP, SrcIsVolatile); +    Value *DstGEP = +        ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset); +    ResBuilder.CreateStore(Load, DstGEP, DstIsVolatile); + +    Value *ResNewIndex = +        ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U)); +    ResidualIndex->addIncoming(ResNewIndex, ResLoopBB); + +    // Create the loop branch condition. +    ResBuilder.CreateCondBr( +        ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB, +        PostLoopBB); +  } else { +    // In this case the loop operand type was a byte, and there is no need for a +    // residual loop to copy the remaining memory after the main loop. +    // We do however need to patch up the control flow by creating the +    // terminators for the preloop block and the memcpy loop. +    ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); +    PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), +                           LoopBB, PostLoopBB); +    PreLoopBB->getTerminator()->eraseFromParent(); +    LoopBuilder.CreateCondBr( +        LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, +        PostLoopBB); +  } +} +  void llvm::createMemCpyLoop(Instruction *InsertBefore,                              Value *SrcAddr, Value *DstAddr, Value *CopyLen,                              unsigned SrcAlign, unsigned DestAlign, @@ -208,15 +452,41 @@ static void createMemSetLoop(Instruction *InsertBefore,                             NewBB);  } -void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy) { -  createMemCpyLoop(/* InsertBefore */ Memcpy, -                   /* SrcAddr */ Memcpy->getRawSource(), -                   /* DstAddr */ Memcpy->getRawDest(), -                   /* CopyLen */ Memcpy->getLength(), -                   /* SrcAlign */ Memcpy->getAlignment(), -                   /* DestAlign */ Memcpy->getAlignment(), -                   /* SrcIsVolatile */ Memcpy->isVolatile(), -                   /* DstIsVolatile */ Memcpy->isVolatile()); +void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, +                              const TargetTransformInfo &TTI) { +  // Original implementation +  if (!TTI.useWideIRMemcpyLoopLowering()) { +    createMemCpyLoop(/* InsertBefore */ Memcpy, +                     /* SrcAddr */ Memcpy->getRawSource(), +                     /* DstAddr */ Memcpy->getRawDest(), +                     /* CopyLen */ Memcpy->getLength(), +                     /* SrcAlign */ Memcpy->getAlignment(), +                     /* DestAlign */ Memcpy->getAlignment(), +                     /* SrcIsVolatile */ Memcpy->isVolatile(), +                     /* DstIsVolatile */ Memcpy->isVolatile()); +  } else { +    if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) { +      createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy, +                                /* SrcAddr */ Memcpy->getRawSource(), +                                /* DstAddr */ Memcpy->getRawDest(), +                                /* CopyLen */ CI, +                                /* SrcAlign */ Memcpy->getAlignment(), +                                /* DestAlign */ Memcpy->getAlignment(), +                                /* SrcIsVolatile */ Memcpy->isVolatile(), +                                /* DstIsVolatile */ Memcpy->isVolatile(), +                                /* TargetTransformInfo */ TTI); +    } else { +      createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy, +                                  /* SrcAddr */ Memcpy->getRawSource(), +                                  /* DstAddr */ Memcpy->getRawDest(), +                                  /* CopyLen */ Memcpy->getLength(), +                                  /* SrcAlign */ Memcpy->getAlignment(), +                                  /* DestAlign */ Memcpy->getAlignment(), +                                  /* SrcIsVolatile */ Memcpy->isVolatile(), +                                  /* DstIsVolatile */ Memcpy->isVolatile(), +                                  /* TargetTransfomrInfo */ TTI); +    } +  }  }  void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) { diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index e724b0a28c322..dee658f983932 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5754,8 +5754,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {    if (BasicBlock *Dom = BB->getSinglePredecessor()) {      auto *PBI = dyn_cast_or_null<BranchInst>(Dom->getTerminator());      if (PBI && PBI->isConditional() && -        PBI->getSuccessor(0) != PBI->getSuccessor(1) && -        (PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB)) { +        PBI->getSuccessor(0) != PBI->getSuccessor(1)) { +      assert(PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB);        bool CondIsFalse = PBI->getSuccessor(1) == BB;        Optional<bool> Implication = isImpliedCondition(            PBI->getCondition(), BI->getCondition(), DL, CondIsFalse); diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index ec8b0d426265a..6d90e6b48358a 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -25,6 +25,7 @@  #include "llvm/IR/IRBuilder.h"  #include "llvm/IR/Instructions.h"  #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PatternMatch.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/raw_ostream.h" @@ -80,6 +81,7 @@ namespace {                                bool IsSigned);      bool eliminateSDiv(BinaryOperator *SDiv);      bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand); +    bool strengthenRightShift(BinaryOperator *BO, Value *IVOperand);    };  } @@ -154,6 +156,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)  void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {    unsigned IVOperIdx = 0;    ICmpInst::Predicate Pred = ICmp->getPredicate(); +  ICmpInst::Predicate OriginalPred = Pred;    if (IVOperand != ICmp->getOperand(0)) {      // Swapped      assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); @@ -262,6 +265,16 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {      ICmp->setPredicate(InvariantPredicate);      ICmp->setOperand(0, NewLHS);      ICmp->setOperand(1, NewRHS); +  } else if (ICmpInst::isSigned(OriginalPred) && +             SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) { +    // If we were unable to make anything above, all we can is to canonicalize +    // the comparison hoping that it will open the doors for other +    // optimizations. If we find out that we compare two non-negative values, +    // we turn the instruction's predicate to its unsigned version. Note that +    // we cannot rely on Pred here unless we check if we have swapped it. +    assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?"); +    DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp << '\n'); +    ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred));    } else      return; @@ -583,6 +596,35 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,    return Changed;  } +/// Annotate the Shr in (X << IVOperand) >> C as exact using the +/// information from the IV's range. Returns true if anything changed, false +/// otherwise. +bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO, +                                          Value *IVOperand) { +  using namespace llvm::PatternMatch; + +  if (BO->getOpcode() == Instruction::Shl) { +    bool Changed = false; +    ConstantRange IVRange = SE->getUnsignedRange(SE->getSCEV(IVOperand)); +    for (auto *U : BO->users()) { +      const APInt *C; +      if (match(U, +                m_AShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C))) || +          match(U, +                m_LShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C)))) { +        BinaryOperator *Shr = cast<BinaryOperator>(U); +        if (!Shr->isExact() && IVRange.getUnsignedMin().uge(*C)) { +          Shr->setIsExact(true); +          Changed = true; +        } +      } +    } +    return Changed; +  } + +  return false; +} +  /// Add all uses of Def to the current IV's worklist.  static void pushIVUsers(    Instruction *Def, @@ -675,8 +717,9 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {      }      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseOper.first)) { -      if (isa<OverflowingBinaryOperator>(BO) && -          strengthenOverflowingOperation(BO, IVOperand)) { +      if ((isa<OverflowingBinaryOperator>(BO) && +           strengthenOverflowingOperation(BO, IVOperand)) || +          (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand))) {          // re-queue uses of the now modified binary operator and fall          // through to the checks that remain.          pushIVUsers(IVOperand, Simplified, SimpleIVUsers); diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index b723b65f35e59..77c0a41929ac7 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -656,7 +656,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {    ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));    // memchr(x, y, 0) -> null -  if (LenC && LenC->isNullValue()) +  if (LenC && LenC->isZero())      return Constant::getNullValue(CI->getType());    // From now on we need at least constant length and string. @@ -2280,7 +2280,7 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,      return true;    if (ConstantInt *ObjSizeCI =            dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) { -    if (ObjSizeCI->isAllOnesValue()) +    if (ObjSizeCI->isMinusOne())        return true;      // If the object size wasn't -1 (unknown), bail out if we were asked to.      if (OnlyLowerUnknownSize) diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp index 60d9ede2c4871..c3feea6a0a414 100644 --- a/lib/Transforms/Utils/VNCoercion.cpp +++ b/lib/Transforms/Utils/VNCoercion.cpp @@ -51,25 +51,24 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy,    // If the store and reload are the same size, we can always reuse it.    if (StoredValSize == LoadedValSize) {      // Pointer to Pointer -> use bitcast. -    if (StoredValTy->getScalarType()->isPointerTy() && -        LoadedTy->getScalarType()->isPointerTy()) { +    if (StoredValTy->isPtrOrPtrVectorTy() && LoadedTy->isPtrOrPtrVectorTy()) {        StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy);      } else {        // Convert source pointers to integers, which can be bitcast. -      if (StoredValTy->getScalarType()->isPointerTy()) { +      if (StoredValTy->isPtrOrPtrVectorTy()) {          StoredValTy = DL.getIntPtrType(StoredValTy);          StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy);        }        Type *TypeToCastTo = LoadedTy; -      if (TypeToCastTo->getScalarType()->isPointerTy()) +      if (TypeToCastTo->isPtrOrPtrVectorTy())          TypeToCastTo = DL.getIntPtrType(TypeToCastTo);        if (StoredValTy != TypeToCastTo)          StoredVal = Helper.CreateBitCast(StoredVal, TypeToCastTo);        // Cast to pointer if the load needs a pointer type. -      if (LoadedTy->getScalarType()->isPointerTy()) +      if (LoadedTy->isPtrOrPtrVectorTy())          StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy);      } @@ -86,7 +85,7 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy,           "canCoerceMustAliasedValueToLoad fail");    // Convert source pointers to integers, which can be manipulated. -  if (StoredValTy->getScalarType()->isPointerTy()) { +  if (StoredValTy->isPtrOrPtrVectorTy()) {      StoredValTy = DL.getIntPtrType(StoredValTy);      StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy);    } @@ -112,7 +111,7 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy,    if (LoadedTy != NewIntTy) {      // If the result is a pointer, inttoptr. -    if (LoadedTy->getScalarType()->isPointerTy()) +    if (LoadedTy->isPtrOrPtrVectorTy())        StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy);      else        // Otherwise, bitcast. @@ -316,7 +315,7 @@ static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy,    uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8;    // Compute which bits of the stored value are being used by the load.  Convert    // to an integer type to start with. -  if (SrcVal->getType()->getScalarType()->isPointerTy()) +  if (SrcVal->getType()->isPtrOrPtrVectorTy())      SrcVal = Helper.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType()));    if (!SrcVal->getType()->isIntegerTy())      SrcVal = Helper.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8)); | 
