diff options
Diffstat (limited to 'llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 185 |
1 files changed, 101 insertions, 84 deletions
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 6698db26626b..1f5bc69acecd 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -28,14 +28,12 @@ #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" @@ -45,7 +43,6 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" @@ -61,15 +58,13 @@ #include <algorithm> #include <cassert> #include <cstdint> -#include <utility> using namespace llvm; #define DEBUG_TYPE "memcpyopt" static cl::opt<bool> EnableMemCpyOptWithoutLibcalls( - "enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden, - cl::ZeroOrMore, + "enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled")); STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); @@ -100,7 +95,7 @@ struct MemsetRange { Value *StartPtr; /// Alignment - The known alignment of the first store. - unsigned Alignment; + MaybeAlign Alignment; /// TheStores - The actual stores that make up this range. SmallVector<Instruction*, 16> TheStores; @@ -182,16 +177,16 @@ public: TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType()); assert(!StoreSize.isScalable() && "Can't track scalable-typed stores"); addRange(OffsetFromFirst, StoreSize.getFixedSize(), SI->getPointerOperand(), - SI->getAlign().value(), SI); + SI->getAlign(), SI); } void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) { int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue(); - addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), MSI); + addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI); } - void addRange(int64_t Start, int64_t Size, Value *Ptr, - unsigned Alignment, Instruction *Inst); + void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment, + Instruction *Inst); }; } // end anonymous namespace @@ -200,7 +195,7 @@ public: /// new range for the specified store at the specified offset, merging into /// existing ranges as appropriate. void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, - unsigned Alignment, Instruction *Inst) { + MaybeAlign Alignment, Instruction *Inst) { int64_t End = Start+Size; range_iterator I = partition_point( @@ -352,9 +347,25 @@ static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc, // Check for mod of Loc between Start and End, excluding both boundaries. // Start and End can be in different blocks. -static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc, - const MemoryUseOrDef *Start, +static bool writtenBetween(MemorySSA *MSSA, AliasAnalysis &AA, + MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End) { + if (isa<MemoryUse>(End)) { + // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes. + // Manually check read accesses between Start and End, if they are in the + // same block, for clobbers. Otherwise assume Loc is clobbered. + return Start->getBlock() != End->getBlock() || + any_of( + make_range(std::next(Start->getIterator()), End->getIterator()), + [&AA, Loc](const MemoryAccess &Acc) { + if (isa<MemoryUse>(&Acc)) + return false; + Instruction *AccInst = + cast<MemoryUseOrDef>(&Acc)->getMemoryInst(); + return isModSet(AA.getModRefInfo(AccInst, Loc)); + }); + } + // TODO: Only walk until we hit Start. MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( End->getDefiningAccess(), Loc); @@ -492,7 +503,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, StartPtr = Range.StartPtr; AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start, - MaybeAlign(Range.Alignment)); + Range.Alignment); LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI : Range.TheStores) dbgs() << *SI << '\n'; @@ -749,36 +760,25 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // Detect cases where we're performing call slot forwarding, but // happen to be using a load-store pair to implement it, rather than // a memcpy. - CallInst *C = nullptr; - if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>( - MSSA->getWalker()->getClobberingMemoryAccess(LI))) { - // The load most post-dom the call. Limit to the same block for now. - // TODO: Support non-local call-slot optimization? - if (LoadClobber->getBlock() == SI->getParent()) - C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst()); - } - - if (C) { - // Check that nothing touches the dest of the "copy" between - // the call and the store. - MemoryLocation StoreLoc = MemoryLocation::get(SI); - if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C), - MSSA->getMemoryAccess(SI))) - C = nullptr; - } + auto GetCall = [&]() -> CallInst * { + // We defer this expensive clobber walk until the cheap checks + // have been done on the source inside performCallSlotOptzn. + if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>( + MSSA->getWalker()->getClobberingMemoryAccess(LI))) + return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst()); + return nullptr; + }; - if (C) { - bool changed = performCallSlotOptzn( - LI, SI, SI->getPointerOperand()->stripPointerCasts(), - LI->getPointerOperand()->stripPointerCasts(), - DL.getTypeStoreSize(SI->getOperand(0)->getType()), - commonAlignment(SI->getAlign(), LI->getAlign()), C); - if (changed) { - eraseInstruction(SI); - eraseInstruction(LI); - ++NumMemCpyInstr; - return true; - } + bool changed = performCallSlotOptzn( + LI, SI, SI->getPointerOperand()->stripPointerCasts(), + LI->getPointerOperand()->stripPointerCasts(), + DL.getTypeStoreSize(SI->getOperand(0)->getType()), + std::min(SI->getAlign(), LI->getAlign()), GetCall); + if (changed) { + eraseInstruction(SI); + eraseInstruction(LI); + ++NumMemCpyInstr; + return true; } } } @@ -853,7 +853,8 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) { bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore, Value *cpyDest, Value *cpySrc, TypeSize cpySize, - Align cpyAlign, CallInst *C) { + Align cpyAlign, + std::function<CallInst *()> GetC) { // The general transformation to keep in mind is // // call @func(..., src, ...) @@ -872,11 +873,6 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, if (cpySize.isScalable()) return false; - // Lifetime marks shouldn't be operated on. - if (Function *F = C->getCalledFunction()) - if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start) - return false; - // Require that src be an alloca. This simplifies the reasoning considerably. auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc); if (!srcAlloca) @@ -893,6 +889,33 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, if (cpySize < srcSize) return false; + CallInst *C = GetC(); + if (!C) + return false; + + // Lifetime marks shouldn't be operated on. + if (Function *F = C->getCalledFunction()) + if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start) + return false; + + + if (C->getParent() != cpyStore->getParent()) { + LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n"); + return false; + } + + MemoryLocation DestLoc = isa<StoreInst>(cpyStore) ? + MemoryLocation::get(cpyStore) : + MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore)); + + // Check that nothing touches the dest of the copy between + // the call and the store/memcpy. + if (accessedBetween(*AA, DestLoc, MSSA->getMemoryAccess(C), + MSSA->getMemoryAccess(cpyStore))) { + LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n"); + return false; + } + // Check that accessing the first srcSize bytes of dest will not cause a // trap. Otherwise the transform is invalid since it might cause a trap // to occur earlier than it otherwise would. @@ -902,6 +925,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, return false; } + // Make sure that nothing can observe cpyDest being written early. There are // a number of cases to consider: // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of @@ -1118,7 +1142,7 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, // then we could still perform the xform by moving M up to the first memcpy. // TODO: It would be sufficient to check the MDep source up to the memcpy // size of M, rather than MDep. - if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep), + if (writtenBetween(MSSA, *AA, MemoryLocation::getForSource(MDep), MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M))) return false; @@ -1215,14 +1239,14 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, } // By default, create an unaligned memset. - unsigned Align = 1; + Align Alignment = Align(1); // If Dest is aligned, and SrcSize is constant, use the minimum alignment // of the sum. - const unsigned DestAlign = - std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment()); + const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(), + MemCpy->getDestAlign().valueOrOne()); if (DestAlign > 1) if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize)) - Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign); + Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue()); IRBuilder<> Builder(MemCpy); @@ -1241,11 +1265,11 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff); unsigned DestAS = Dest->getType()->getPointerAddressSpace(); Instruction *NewMemSet = Builder.CreateMemSet( - Builder.CreateGEP(Builder.getInt8Ty(), - Builder.CreatePointerCast(Dest, - Builder.getInt8PtrTy(DestAS)), - SrcSize), - MemSet->getOperand(1), MemsetLen, MaybeAlign(Align)); + Builder.CreateGEP( + Builder.getInt8Ty(), + Builder.CreatePointerCast(Dest, Builder.getInt8PtrTy(DestAS)), + SrcSize), + MemSet->getOperand(1), MemsetLen, Alignment); assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) && "MemCpy must be a MemoryDef"); @@ -1402,7 +1426,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { } MemoryUseOrDef *MA = MSSA->getMemoryAccess(M); - MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA); + // FIXME: Not using getClobberingMemoryAccess() here due to PR54682. + MemoryAccess *AnyClobber = MA->getDefiningAccess(); MemoryLocation DestLoc = MemoryLocation::getForDest(M); const MemoryAccess *DestClobber = MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc); @@ -1431,28 +1456,20 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { if (Instruction *MI = MD->getMemoryInst()) { if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) { if (auto *C = dyn_cast<CallInst>(MI)) { - // The memcpy must post-dom the call. Limit to the same block for - // now. Additionally, we need to ensure that there are no accesses - // to dest between the call and the memcpy. Accesses to src will be - // checked by performCallSlotOptzn(). - // TODO: Support non-local call-slot optimization? - if (C->getParent() == M->getParent() && - !accessedBetween(*AA, DestLoc, MD, MA)) { - // FIXME: Can we pass in either of dest/src alignment here instead - // of conservatively taking the minimum? - Align Alignment = std::min(M->getDestAlign().valueOrOne(), - M->getSourceAlign().valueOrOne()); - if (performCallSlotOptzn( - M, M, M->getDest(), M->getSource(), - TypeSize::getFixed(CopySize->getZExtValue()), Alignment, - C)) { - LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n" - << " call: " << *C << "\n" - << " memcpy: " << *M << "\n"); - eraseInstruction(M); - ++NumMemCpyInstr; - return true; - } + // FIXME: Can we pass in either of dest/src alignment here instead + // of conservatively taking the minimum? + Align Alignment = std::min(M->getDestAlign().valueOrOne(), + M->getSourceAlign().valueOrOne()); + if (performCallSlotOptzn( + M, M, M->getDest(), M->getSource(), + TypeSize::getFixed(CopySize->getZExtValue()), Alignment, + [C]() -> CallInst * { return C; })) { + LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n" + << " call: " << *C << "\n" + << " memcpy: " << *M << "\n"); + eraseInstruction(M); + ++NumMemCpyInstr; + return true; } } } @@ -1557,7 +1574,7 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) { // *b = 42; // foo(*a) // It would be invalid to transform the second memcpy into foo(*b). - if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep), + if (writtenBetween(MSSA, *AA, MemoryLocation::getForSource(MDep), MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB))) return false; |
