diff options
Diffstat (limited to 'lib/Transforms/Scalar/MemCpyOptimizer.cpp')
| -rw-r--r-- | lib/Transforms/Scalar/MemCpyOptimizer.cpp | 101 | 
1 files changed, 60 insertions, 41 deletions
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 9c870b42a7477..3b74421a47a0a 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -25,6 +25,7 @@  #include "llvm/Analysis/MemoryDependenceAnalysis.h"  #include "llvm/Analysis/MemoryLocation.h"  #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h"  #include "llvm/Analysis/ValueTracking.h"  #include "llvm/IR/Argument.h"  #include "llvm/IR/BasicBlock.h" @@ -55,7 +56,6 @@  #include "llvm/Support/MathExtras.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h"  #include <algorithm>  #include <cassert>  #include <cstdint> @@ -263,7 +263,7 @@ public:    void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {      int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue(); -    addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI); +    addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), MSI);    }    void addRange(int64_t Start, int64_t Size, Value *Ptr, @@ -479,10 +479,10 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,      AMemSet =        Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); -    DEBUG(dbgs() << "Replace stores:\n"; -          for (Instruction *SI : Range.TheStores) -            dbgs() << *SI << '\n'; -          dbgs() << "With: " << *AMemSet << '\n'); +    LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI +                                                   : Range.TheStores) dbgs() +                                              << *SI << '\n'; +               dbgs() << "With: " << *AMemSet << '\n');      if (!Range.TheStores.empty())        AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); @@ -498,16 +498,25 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,    return AMemSet;  } -static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI, -                                     const LoadInst *LI) { +static unsigned findStoreAlignment(const DataLayout &DL, const StoreInst *SI) {    unsigned StoreAlign = SI->getAlignment();    if (!StoreAlign)      StoreAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType()); +  return StoreAlign; +} + +static unsigned findLoadAlignment(const DataLayout &DL, const LoadInst *LI) {    unsigned LoadAlign = LI->getAlignment();    if (!LoadAlign)      LoadAlign = DL.getABITypeAlignment(LI->getType()); +  return LoadAlign; +} -  return std::min(StoreAlign, LoadAlign); +static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI, +                                     const LoadInst *LI) { +  unsigned StoreAlign = findStoreAlignment(DL, SI); +  unsigned LoadAlign = findLoadAlignment(DL, LI); +  return MinAlign(StoreAlign, LoadAlign);  }  // This method try to lift a store instruction before position P. @@ -522,7 +531,7 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,      return false;    // Keep track of the arguments of all instruction we plan to lift -  // so we can make sure to lift them as well if apropriate. +  // so we can make sure to lift them as well if appropriate.    DenseSet<Instruction*> Args;    if (auto *Ptr = dyn_cast<Instruction>(SI->getPointerOperand()))      if (Ptr->getParent() == SI->getParent()) @@ -594,7 +603,7 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,    // We made it, we need to lift    for (auto *I : llvm::reverse(ToLift)) { -    DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n"); +    LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");      I->moveBefore(P);    } @@ -656,22 +665,23 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {            if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc))              UseMemMove = true; -          unsigned Align = findCommonAlignment(DL, SI, LI);            uint64_t Size = DL.getTypeStoreSize(T);            IRBuilder<> Builder(P);            Instruction *M;            if (UseMemMove) -            M = Builder.CreateMemMove(SI->getPointerOperand(), -                                      LI->getPointerOperand(), Size, -                                      Align, SI->isVolatile()); +            M = Builder.CreateMemMove( +                SI->getPointerOperand(), findStoreAlignment(DL, SI), +                LI->getPointerOperand(), findLoadAlignment(DL, LI), Size, +                SI->isVolatile());            else -            M = Builder.CreateMemCpy(SI->getPointerOperand(), -                                     LI->getPointerOperand(), Size, -                                     Align, SI->isVolatile()); +            M = Builder.CreateMemCpy( +                SI->getPointerOperand(), findStoreAlignment(DL, SI), +                LI->getPointerOperand(), findLoadAlignment(DL, LI), Size, +                SI->isVolatile()); -          DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI -                       << " => " << *M << "\n"); +          LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " +                            << *M << "\n");            MD->removeInstruction(SI);            SI->eraseFromParent(); @@ -760,7 +770,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {        auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal,                                       Size, Align, SI->isVolatile()); -      DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n"); +      LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");        MD->removeInstruction(SI);        SI->eraseFromParent(); @@ -1047,20 +1057,17 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,    // If all checks passed, then we can transform M. -  // Make sure to use the lesser of the alignment of the source and the dest -  // since we're changing where we're reading from, but don't want to increase -  // the alignment past what can be read from or written to.    // TODO: Is this worth it if we're creating a less aligned memcpy? For    // example we could be moving from movaps -> movq on x86. -  unsigned Align = std::min(MDep->getAlignment(), M->getAlignment()); -    IRBuilder<> Builder(M);    if (UseMemMove) -    Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(), -                          Align, M->isVolatile()); +    Builder.CreateMemMove(M->getRawDest(), M->getDestAlignment(), +                          MDep->getRawSource(), MDep->getSourceAlignment(), +                          M->getLength(), M->isVolatile());    else -    Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(), -                         Align, M->isVolatile()); +    Builder.CreateMemCpy(M->getRawDest(), M->getDestAlignment(), +                         MDep->getRawSource(), MDep->getSourceAlignment(), +                         M->getLength(), M->isVolatile());    // Remove the instruction we're replacing.    MD->removeInstruction(M); @@ -1106,7 +1113,7 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,    // If Dest is aligned, and SrcSize is constant, use the minimum alignment    // of the sum.    const unsigned DestAlign = -      std::max(MemSet->getAlignment(), MemCpy->getAlignment()); +      std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment());    if (DestAlign > 1)      if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))        Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign); @@ -1166,7 +1173,7 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,    IRBuilder<> Builder(MemCpy);    Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), -                       CopySize, MemCpy->getAlignment()); +                       CopySize, MemCpy->getDestAlignment());    return true;  } @@ -1192,7 +1199,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {        if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {          IRBuilder<> Builder(M);          Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(), -                             M->getAlignment(), false); +                             M->getDestAlignment(), false);          MD->removeInstruction(M);          M->eraseFromParent();          ++NumCpyToSet; @@ -1221,8 +1228,11 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {    //   d) memcpy from a just-memset'd source can be turned into memset.    if (DepInfo.isClobber()) {      if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) { +      // FIXME: Can we pass in either of dest/src alignment here instead +      // of conservatively taking the minimum? +      unsigned Align = MinAlign(M->getDestAlignment(), M->getSourceAlignment());        if (performCallSlotOptzn(M, M->getDest(), M->getSource(), -                               CopySize->getZExtValue(), M->getAlignment(), +                               CopySize->getZExtValue(), Align,                                 C)) {          MD->removeInstruction(M);          M->eraseFromParent(); @@ -1284,8 +1294,8 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) {                      MemoryLocation::getForSource(M)))      return false; -  DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M -               << "\n"); +  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M +                    << "\n");    // If not, then we know we can transform this.    Type *ArgTys[3] = { M->getRawDest()->getType(), @@ -1337,7 +1347,7 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {    // source of the memcpy to the alignment we need.  If we fail, we bail out.    AssumptionCache &AC = LookupAssumptionCache();    DominatorTree &DT = LookupDomTree(); -  if (MDep->getAlignment() < ByValAlign && +  if (MDep->getSourceAlignment() < ByValAlign &&        getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL,                                   CS.getInstruction(), &AC, &DT) < ByValAlign)      return false; @@ -1367,9 +1377,9 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {      TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),                                "tmpcast", CS.getInstruction()); -  DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n" -               << "  " << *MDep << "\n" -               << "  " << *CS.getInstruction() << "\n"); +  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n" +                    << "  " << *MDep << "\n" +                    << "  " << *CS.getInstruction() << "\n");    // Otherwise we're good!  Update the byval argument.    CS.setArgument(ArgNo, TmpCast); @@ -1381,10 +1391,19 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {  bool MemCpyOptPass::iterateOnFunction(Function &F) {    bool MadeChange = false; +  DominatorTree &DT = LookupDomTree(); +    // Walk all instruction in the function.    for (BasicBlock &BB : F) { +    // Skip unreachable blocks. For example processStore assumes that an +    // instruction in a BB can't be dominated by a later instruction in the +    // same BB (which is a scenario that can happen for an unreachable BB that +    // has itself as a predecessor). +    if (!DT.isReachableFromEntry(&BB)) +      continue; +      for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) { -      // Avoid invalidating the iterator. +        // Avoid invalidating the iterator.        Instruction *I = &*BI++;        bool RepeatInstruction = false;  | 
