summaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar/MemCpyOptimizer.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2018-07-28 10:51:19 +0000
committerDimitry Andric <dim@FreeBSD.org>2018-07-28 10:51:19 +0000
commiteb11fae6d08f479c0799db45860a98af528fa6e7 (patch)
tree44d492a50c8c1a7eb8e2d17ea3360ec4d066f042 /lib/Transforms/Scalar/MemCpyOptimizer.cpp
parentb8a2042aa938069e862750553db0e4d82d25822c (diff)
Notes
Diffstat (limited to 'lib/Transforms/Scalar/MemCpyOptimizer.cpp')
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp101
1 files changed, 60 insertions, 41 deletions
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 9c870b42a747..3b74421a47a0 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -25,6 +25,7 @@
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
@@ -55,7 +56,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -263,7 +263,7 @@ public:
void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
- addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI);
+ addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), MSI);
}
void addRange(int64_t Start, int64_t Size, Value *Ptr,
@@ -479,10 +479,10 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
AMemSet =
Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
- DEBUG(dbgs() << "Replace stores:\n";
- for (Instruction *SI : Range.TheStores)
- dbgs() << *SI << '\n';
- dbgs() << "With: " << *AMemSet << '\n');
+ LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
+ : Range.TheStores) dbgs()
+ << *SI << '\n';
+ dbgs() << "With: " << *AMemSet << '\n');
if (!Range.TheStores.empty())
AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
@@ -498,16 +498,25 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
return AMemSet;
}
-static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI,
- const LoadInst *LI) {
+static unsigned findStoreAlignment(const DataLayout &DL, const StoreInst *SI) {
unsigned StoreAlign = SI->getAlignment();
if (!StoreAlign)
StoreAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType());
+ return StoreAlign;
+}
+
+static unsigned findLoadAlignment(const DataLayout &DL, const LoadInst *LI) {
unsigned LoadAlign = LI->getAlignment();
if (!LoadAlign)
LoadAlign = DL.getABITypeAlignment(LI->getType());
+ return LoadAlign;
+}
- return std::min(StoreAlign, LoadAlign);
+static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI,
+ const LoadInst *LI) {
+ unsigned StoreAlign = findStoreAlignment(DL, SI);
+ unsigned LoadAlign = findLoadAlignment(DL, LI);
+ return MinAlign(StoreAlign, LoadAlign);
}
// This method try to lift a store instruction before position P.
@@ -522,7 +531,7 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
return false;
// Keep track of the arguments of all instruction we plan to lift
- // so we can make sure to lift them as well if apropriate.
+ // so we can make sure to lift them as well if appropriate.
DenseSet<Instruction*> Args;
if (auto *Ptr = dyn_cast<Instruction>(SI->getPointerOperand()))
if (Ptr->getParent() == SI->getParent())
@@ -594,7 +603,7 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
// We made it, we need to lift
for (auto *I : llvm::reverse(ToLift)) {
- DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
+ LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
I->moveBefore(P);
}
@@ -656,22 +665,23 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc))
UseMemMove = true;
- unsigned Align = findCommonAlignment(DL, SI, LI);
uint64_t Size = DL.getTypeStoreSize(T);
IRBuilder<> Builder(P);
Instruction *M;
if (UseMemMove)
- M = Builder.CreateMemMove(SI->getPointerOperand(),
- LI->getPointerOperand(), Size,
- Align, SI->isVolatile());
+ M = Builder.CreateMemMove(
+ SI->getPointerOperand(), findStoreAlignment(DL, SI),
+ LI->getPointerOperand(), findLoadAlignment(DL, LI), Size,
+ SI->isVolatile());
else
- M = Builder.CreateMemCpy(SI->getPointerOperand(),
- LI->getPointerOperand(), Size,
- Align, SI->isVolatile());
+ M = Builder.CreateMemCpy(
+ SI->getPointerOperand(), findStoreAlignment(DL, SI),
+ LI->getPointerOperand(), findLoadAlignment(DL, LI), Size,
+ SI->isVolatile());
- DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI
- << " => " << *M << "\n");
+ LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
+ << *M << "\n");
MD->removeInstruction(SI);
SI->eraseFromParent();
@@ -760,7 +770,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal,
Size, Align, SI->isVolatile());
- DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
+ LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
MD->removeInstruction(SI);
SI->eraseFromParent();
@@ -1047,20 +1057,17 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
// If all checks passed, then we can transform M.
- // Make sure to use the lesser of the alignment of the source and the dest
- // since we're changing where we're reading from, but don't want to increase
- // the alignment past what can be read from or written to.
// TODO: Is this worth it if we're creating a less aligned memcpy? For
// example we could be moving from movaps -> movq on x86.
- unsigned Align = std::min(MDep->getAlignment(), M->getAlignment());
-
IRBuilder<> Builder(M);
if (UseMemMove)
- Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(),
- Align, M->isVolatile());
+ Builder.CreateMemMove(M->getRawDest(), M->getDestAlignment(),
+ MDep->getRawSource(), MDep->getSourceAlignment(),
+ M->getLength(), M->isVolatile());
else
- Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(),
- Align, M->isVolatile());
+ Builder.CreateMemCpy(M->getRawDest(), M->getDestAlignment(),
+ MDep->getRawSource(), MDep->getSourceAlignment(),
+ M->getLength(), M->isVolatile());
// Remove the instruction we're replacing.
MD->removeInstruction(M);
@@ -1106,7 +1113,7 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
// If Dest is aligned, and SrcSize is constant, use the minimum alignment
// of the sum.
const unsigned DestAlign =
- std::max(MemSet->getAlignment(), MemCpy->getAlignment());
+ std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment());
if (DestAlign > 1)
if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
@@ -1166,7 +1173,7 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
IRBuilder<> Builder(MemCpy);
Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
- CopySize, MemCpy->getAlignment());
+ CopySize, MemCpy->getDestAlignment());
return true;
}
@@ -1192,7 +1199,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
IRBuilder<> Builder(M);
Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
- M->getAlignment(), false);
+ M->getDestAlignment(), false);
MD->removeInstruction(M);
M->eraseFromParent();
++NumCpyToSet;
@@ -1221,8 +1228,11 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
// d) memcpy from a just-memset'd source can be turned into memset.
if (DepInfo.isClobber()) {
if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
+ // FIXME: Can we pass in either of dest/src alignment here instead
+ // of conservatively taking the minimum?
+ unsigned Align = MinAlign(M->getDestAlignment(), M->getSourceAlignment());
if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
- CopySize->getZExtValue(), M->getAlignment(),
+ CopySize->getZExtValue(), Align,
C)) {
MD->removeInstruction(M);
M->eraseFromParent();
@@ -1284,8 +1294,8 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
MemoryLocation::getForSource(M)))
return false;
- DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
- << "\n");
+ LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
+ << "\n");
// If not, then we know we can transform this.
Type *ArgTys[3] = { M->getRawDest()->getType(),
@@ -1337,7 +1347,7 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
// source of the memcpy to the alignment we need. If we fail, we bail out.
AssumptionCache &AC = LookupAssumptionCache();
DominatorTree &DT = LookupDomTree();
- if (MDep->getAlignment() < ByValAlign &&
+ if (MDep->getSourceAlignment() < ByValAlign &&
getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL,
CS.getInstruction(), &AC, &DT) < ByValAlign)
return false;
@@ -1367,9 +1377,9 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
"tmpcast", CS.getInstruction());
- DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
- << " " << *MDep << "\n"
- << " " << *CS.getInstruction() << "\n");
+ LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
+ << " " << *MDep << "\n"
+ << " " << *CS.getInstruction() << "\n");
// Otherwise we're good! Update the byval argument.
CS.setArgument(ArgNo, TmpCast);
@@ -1381,10 +1391,19 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
bool MemCpyOptPass::iterateOnFunction(Function &F) {
bool MadeChange = false;
+ DominatorTree &DT = LookupDomTree();
+
// Walk all instruction in the function.
for (BasicBlock &BB : F) {
+ // Skip unreachable blocks. For example processStore assumes that an
+ // instruction in a BB can't be dominated by a later instruction in the
+ // same BB (which is a scenario that can happen for an unreachable BB that
+ // has itself as a predecessor).
+ if (!DT.isReachableFromEntry(&BB))
+ continue;
+
for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
- // Avoid invalidating the iterator.
+ // Avoid invalidating the iterator.
Instruction *I = &*BI++;
bool RepeatInstruction = false;