aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp185
1 files changed, 101 insertions, 84 deletions
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 6698db26626b..1f5bc69acecd 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -28,14 +28,12 @@
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
@@ -45,7 +43,6 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
@@ -61,15 +58,13 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
-#include <utility>
using namespace llvm;
#define DEBUG_TYPE "memcpyopt"
static cl::opt<bool> EnableMemCpyOptWithoutLibcalls(
- "enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden,
- cl::ZeroOrMore,
+ "enable-memcpyopt-without-libcalls", cl::Hidden,
cl::desc("Enable memcpyopt even when libcalls are disabled"));
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
@@ -100,7 +95,7 @@ struct MemsetRange {
Value *StartPtr;
/// Alignment - The known alignment of the first store.
- unsigned Alignment;
+ MaybeAlign Alignment;
/// TheStores - The actual stores that make up this range.
SmallVector<Instruction*, 16> TheStores;
@@ -182,16 +177,16 @@ public:
TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
addRange(OffsetFromFirst, StoreSize.getFixedSize(), SI->getPointerOperand(),
- SI->getAlign().value(), SI);
+ SI->getAlign(), SI);
}
void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
- addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), MSI);
+ addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI);
}
- void addRange(int64_t Start, int64_t Size, Value *Ptr,
- unsigned Alignment, Instruction *Inst);
+ void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
+ Instruction *Inst);
};
} // end anonymous namespace
@@ -200,7 +195,7 @@ public:
/// new range for the specified store at the specified offset, merging into
/// existing ranges as appropriate.
void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
- unsigned Alignment, Instruction *Inst) {
+ MaybeAlign Alignment, Instruction *Inst) {
int64_t End = Start+Size;
range_iterator I = partition_point(
@@ -352,9 +347,25 @@ static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc,
// Check for mod of Loc between Start and End, excluding both boundaries.
// Start and End can be in different blocks.
-static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc,
- const MemoryUseOrDef *Start,
+static bool writtenBetween(MemorySSA *MSSA, AliasAnalysis &AA,
+ MemoryLocation Loc, const MemoryUseOrDef *Start,
const MemoryUseOrDef *End) {
+ if (isa<MemoryUse>(End)) {
+ // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
+ // Manually check read accesses between Start and End, if they are in the
+ // same block, for clobbers. Otherwise assume Loc is clobbered.
+ return Start->getBlock() != End->getBlock() ||
+ any_of(
+ make_range(std::next(Start->getIterator()), End->getIterator()),
+ [&AA, Loc](const MemoryAccess &Acc) {
+ if (isa<MemoryUse>(&Acc))
+ return false;
+ Instruction *AccInst =
+ cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
+ return isModSet(AA.getModRefInfo(AccInst, Loc));
+ });
+ }
+
// TODO: Only walk until we hit Start.
MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
End->getDefiningAccess(), Loc);
@@ -492,7 +503,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
StartPtr = Range.StartPtr;
AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
- MaybeAlign(Range.Alignment));
+ Range.Alignment);
LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
: Range.TheStores) dbgs()
<< *SI << '\n';
@@ -749,36 +760,25 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// Detect cases where we're performing call slot forwarding, but
// happen to be using a load-store pair to implement it, rather than
// a memcpy.
- CallInst *C = nullptr;
- if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
- MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
- // The load most post-dom the call. Limit to the same block for now.
- // TODO: Support non-local call-slot optimization?
- if (LoadClobber->getBlock() == SI->getParent())
- C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
- }
-
- if (C) {
- // Check that nothing touches the dest of the "copy" between
- // the call and the store.
- MemoryLocation StoreLoc = MemoryLocation::get(SI);
- if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
- MSSA->getMemoryAccess(SI)))
- C = nullptr;
- }
+ auto GetCall = [&]() -> CallInst * {
+ // We defer this expensive clobber walk until the cheap checks
+ // have been done on the source inside performCallSlotOptzn.
+ if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
+ MSSA->getWalker()->getClobberingMemoryAccess(LI)))
+ return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
+ return nullptr;
+ };
- if (C) {
- bool changed = performCallSlotOptzn(
- LI, SI, SI->getPointerOperand()->stripPointerCasts(),
- LI->getPointerOperand()->stripPointerCasts(),
- DL.getTypeStoreSize(SI->getOperand(0)->getType()),
- commonAlignment(SI->getAlign(), LI->getAlign()), C);
- if (changed) {
- eraseInstruction(SI);
- eraseInstruction(LI);
- ++NumMemCpyInstr;
- return true;
- }
+ bool changed = performCallSlotOptzn(
+ LI, SI, SI->getPointerOperand()->stripPointerCasts(),
+ LI->getPointerOperand()->stripPointerCasts(),
+ DL.getTypeStoreSize(SI->getOperand(0)->getType()),
+ std::min(SI->getAlign(), LI->getAlign()), GetCall);
+ if (changed) {
+ eraseInstruction(SI);
+ eraseInstruction(LI);
+ ++NumMemCpyInstr;
+ return true;
}
}
}
@@ -853,7 +853,8 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
Instruction *cpyStore, Value *cpyDest,
Value *cpySrc, TypeSize cpySize,
- Align cpyAlign, CallInst *C) {
+ Align cpyAlign,
+ std::function<CallInst *()> GetC) {
// The general transformation to keep in mind is
//
// call @func(..., src, ...)
@@ -872,11 +873,6 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
if (cpySize.isScalable())
return false;
- // Lifetime marks shouldn't be operated on.
- if (Function *F = C->getCalledFunction())
- if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
- return false;
-
// Require that src be an alloca. This simplifies the reasoning considerably.
auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
if (!srcAlloca)
@@ -893,6 +889,33 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
if (cpySize < srcSize)
return false;
+ CallInst *C = GetC();
+ if (!C)
+ return false;
+
+ // Lifetime marks shouldn't be operated on.
+ if (Function *F = C->getCalledFunction())
+ if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
+ return false;
+
+
+ if (C->getParent() != cpyStore->getParent()) {
+ LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
+ return false;
+ }
+
+ MemoryLocation DestLoc = isa<StoreInst>(cpyStore) ?
+ MemoryLocation::get(cpyStore) :
+ MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
+
+ // Check that nothing touches the dest of the copy between
+ // the call and the store/memcpy.
+ if (accessedBetween(*AA, DestLoc, MSSA->getMemoryAccess(C),
+ MSSA->getMemoryAccess(cpyStore))) {
+ LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
+ return false;
+ }
+
// Check that accessing the first srcSize bytes of dest will not cause a
// trap. Otherwise the transform is invalid since it might cause a trap
// to occur earlier than it otherwise would.
@@ -902,6 +925,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
return false;
}
+
// Make sure that nothing can observe cpyDest being written early. There are
// a number of cases to consider:
// 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
@@ -1118,7 +1142,7 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
// then we could still perform the xform by moving M up to the first memcpy.
// TODO: It would be sufficient to check the MDep source up to the memcpy
// size of M, rather than MDep.
- if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
+ if (writtenBetween(MSSA, *AA, MemoryLocation::getForSource(MDep),
MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
return false;
@@ -1215,14 +1239,14 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
}
// By default, create an unaligned memset.
- unsigned Align = 1;
+ Align Alignment = Align(1);
// If Dest is aligned, and SrcSize is constant, use the minimum alignment
// of the sum.
- const unsigned DestAlign =
- std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment());
+ const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(),
+ MemCpy->getDestAlign().valueOrOne());
if (DestAlign > 1)
if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
- Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
+ Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue());
IRBuilder<> Builder(MemCpy);
@@ -1241,11 +1265,11 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
unsigned DestAS = Dest->getType()->getPointerAddressSpace();
Instruction *NewMemSet = Builder.CreateMemSet(
- Builder.CreateGEP(Builder.getInt8Ty(),
- Builder.CreatePointerCast(Dest,
- Builder.getInt8PtrTy(DestAS)),
- SrcSize),
- MemSet->getOperand(1), MemsetLen, MaybeAlign(Align));
+ Builder.CreateGEP(
+ Builder.getInt8Ty(),
+ Builder.CreatePointerCast(Dest, Builder.getInt8PtrTy(DestAS)),
+ SrcSize),
+ MemSet->getOperand(1), MemsetLen, Alignment);
assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
"MemCpy must be a MemoryDef");
@@ -1402,7 +1426,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
}
MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
- MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
+ // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
+ MemoryAccess *AnyClobber = MA->getDefiningAccess();
MemoryLocation DestLoc = MemoryLocation::getForDest(M);
const MemoryAccess *DestClobber =
MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
@@ -1431,28 +1456,20 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
if (Instruction *MI = MD->getMemoryInst()) {
if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
if (auto *C = dyn_cast<CallInst>(MI)) {
- // The memcpy must post-dom the call. Limit to the same block for
- // now. Additionally, we need to ensure that there are no accesses
- // to dest between the call and the memcpy. Accesses to src will be
- // checked by performCallSlotOptzn().
- // TODO: Support non-local call-slot optimization?
- if (C->getParent() == M->getParent() &&
- !accessedBetween(*AA, DestLoc, MD, MA)) {
- // FIXME: Can we pass in either of dest/src alignment here instead
- // of conservatively taking the minimum?
- Align Alignment = std::min(M->getDestAlign().valueOrOne(),
- M->getSourceAlign().valueOrOne());
- if (performCallSlotOptzn(
- M, M, M->getDest(), M->getSource(),
- TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
- C)) {
- LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
- << " call: " << *C << "\n"
- << " memcpy: " << *M << "\n");
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
- }
+ // FIXME: Can we pass in either of dest/src alignment here instead
+ // of conservatively taking the minimum?
+ Align Alignment = std::min(M->getDestAlign().valueOrOne(),
+ M->getSourceAlign().valueOrOne());
+ if (performCallSlotOptzn(
+ M, M, M->getDest(), M->getSource(),
+ TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
+ [C]() -> CallInst * { return C; })) {
+ LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
+ << " call: " << *C << "\n"
+ << " memcpy: " << *M << "\n");
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
}
}
}
@@ -1557,7 +1574,7 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
// *b = 42;
// foo(*a)
// It would be invalid to transform the second memcpy into foo(*b).
- if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
+ if (writtenBetween(MSSA, *AA, MemoryLocation::getForSource(MDep),
MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
return false;