src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2015-12-30 11:46:15 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2015-12-30 11:46:15 +0000
commit	dd58ef019b700900793a1eb48b52123db01b654e (patch)
tree	fcfbb4df56a744f4ddc6122c50521dd3f1c5e196 /lib/Transforms/Scalar/MemCpyOptimizer.cpp
parent	2fe5752e3a7c345cdb59e869278d36af33c13fa4 (diff)
download	src-dd58ef019b700900793a1eb48b52123db01b654e.tar.gz src-dd58ef019b700900793a1eb48b52123db01b654e.zip

Notes

Diffstat (limited to 'lib/Transforms/Scalar/MemCpyOptimizer.cpp')

-rw-r--r--

lib/Transforms/Scalar/MemCpyOptimizer.cpp

174

1 files changed, 85 insertions, 89 deletions

diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 85012afc80ac..0333bf2284e1 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp

@@ -17,6 +17,7 @@

#include "llvm/ADT/Statistic.h"

#include "llvm/Analysis/AliasAnalysis.h"

#include "llvm/Analysis/AssumptionCache.h"

+#include "llvm/Analysis/GlobalsModRef.h"

#include "llvm/Analysis/MemoryDependenceAnalysis.h"

#include "llvm/Analysis/TargetLibraryInfo.h"

#include "llvm/Analysis/ValueTracking.h"

@@ -30,7 +31,7 @@

#include "llvm/Support/Debug.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/Utils/Local.h"

-#include <list>

+#include <algorithm>

using namespace llvm;

#define DEBUG_TYPE "memcpyopt"

@@ -71,9 +72,9 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,

return Offset;

}

-/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a

-/// constant offset, and return that constant offset. For example, Ptr1 might

-/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8.

+/// Return true if Ptr1 is provably equal to Ptr2 plus a constant offset, and

+/// return that constant offset. For example, Ptr1 might be &A[42], and Ptr2

+/// might be &A[40]. In this case offset would be -8.

static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,

const DataLayout &DL) {

Ptr1 = Ptr1->stripPointerCasts();

@@ -125,7 +126,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,

}

-/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value.

+/// Represents a range of memset'd bytes with the ByteVal value.

/// This allows us to analyze stores like:

/// store 0 -> P+1

/// store 0 -> P+0

@@ -164,8 +165,8 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {

// If any of the stores are a memset, then it is always good to extend the

// memset.

- for (unsigned i = 0, e = TheStores.size(); i != e; ++i)

- if (!isa<StoreInst>(TheStores[i]))

+ for (Instruction *SI : TheStores)

+ if (!isa<StoreInst>(SI))

return true;

// Assume that the code generator is capable of merging pairs of stores

@@ -189,7 +190,7 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {

unsigned NumPointerStores = Bytes / MaxIntSize;

// Assume the remaining bytes if any are done a byte at a time.

- unsigned NumByteStores = Bytes - NumPointerStores * MaxIntSize;

+ unsigned NumByteStores = Bytes % MaxIntSize;

// If we will reduce the # stores (according to this heuristic), do the

// transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32

@@ -200,15 +201,14 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {

namespace {

class MemsetRanges {

- /// Ranges - A sorted list of the memset ranges. We use std::list here

- /// because each element is relatively large and expensive to copy.

- std::list<MemsetRange> Ranges;

- typedef std::list<MemsetRange>::iterator range_iterator;

+ /// A sorted list of the memset ranges.

+ SmallVector<MemsetRange, 8> Ranges;

+ typedef SmallVectorImpl<MemsetRange>::iterator range_iterator;

const DataLayout &DL;

public:

MemsetRanges(const DataLayout &DL) : DL(DL) {}

- typedef std::list<MemsetRange>::const_iterator const_iterator;

+ typedef SmallVectorImpl<MemsetRange>::const_iterator const_iterator;

const_iterator begin() const { return Ranges.begin(); }

const_iterator end() const { return Ranges.end(); }

bool empty() const { return Ranges.empty(); }

@@ -240,26 +240,20 @@ public:

} // end anon namespace

-/// addRange - Add a new store to the MemsetRanges data structure. This adds a

+/// Add a new store to the MemsetRanges data structure. This adds a

/// new range for the specified store at the specified offset, merging into

/// existing ranges as appropriate.

-///

-/// Do a linear search of the ranges to see if this can be joined and/or to

-/// find the insertion point in the list. We keep the ranges sorted for

-/// simplicity here. This is a linear search of a linked list, which is ugly,

-/// however the number of ranges is limited, so this won't get crazy slow.

void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,

unsigned Alignment, Instruction *Inst) {

int64_t End = Start+Size;

- range_iterator I = Ranges.begin(), E = Ranges.end();

- while (I != E && Start > I->End)

- ++I;

+ range_iterator I = std::lower_bound(Ranges.begin(), Ranges.end(), Start,

+ [](const MemsetRange &LHS, int64_t RHS) { return LHS.End < RHS; });

// We now know that I == E, in which case we didn't find anything to merge

// with, or that Start <= I->End. If End < I->Start or I == E, then we need

// to insert a new range. Handle this now.

- if (I == E || End < I->Start) {

+ if (I == Ranges.end() || End < I->Start) {

MemsetRange &R = *Ranges.insert(I, MemsetRange());

R.Start = Start;

R.End = End;

@@ -295,7 +289,7 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,

if (End > I->End) {

I->End = End;

range_iterator NextI = I;

- while (++NextI != E && End >= NextI->Start) {

+ while (++NextI != Ranges.end() && End >= NextI->Start) {

// Merge the range in.

I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());

if (NextI->End > I->End)

@@ -331,9 +325,9 @@ namespace {

AU.addRequired<AssumptionCacheTracker>();

AU.addRequired<DominatorTreeWrapperPass>();

AU.addRequired<MemoryDependenceAnalysis>();

- AU.addRequired<AliasAnalysis>();

+ AU.addRequired<AAResultsWrapperPass>();

AU.addRequired<TargetLibraryInfoWrapperPass>();

- AU.addPreserved<AliasAnalysis>();

+ AU.addPreserved<GlobalsAAWrapperPass>();

AU.addPreserved<MemoryDependenceAnalysis>();

}

@@ -357,7 +351,7 @@ namespace {

char MemCpyOpt::ID = 0;

}

-// createMemCpyOptPass - The public interface to this file...

+/// The public interface to this file...

FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }

INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",

@@ -366,14 +360,15 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)

INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)

INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)

INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)

-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)

+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)

+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)

INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",

false, false)

-/// tryMergingIntoMemset - When scanning forward over instructions, we look for

-/// some other patterns to fold away. In particular, this looks for stores to

-/// neighboring locations of memory. If it sees enough consecutive ones, it

-/// attempts to merge them together into a memcpy/memset.

+/// When scanning forward over instructions, we look for some other patterns to

+/// fold away. In particular, this looks for stores to neighboring locations of

+/// memory. If it sees enough consecutive ones, it attempts to merge them

+/// together into a memcpy/memset.

Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,

Value *StartPtr, Value *ByteVal) {

const DataLayout &DL = StartInst->getModule()->getDataLayout();

@@ -384,7 +379,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,

// are stored.

MemsetRanges Ranges(DL);

- BasicBlock::iterator BI = StartInst;

+ BasicBlock::iterator BI(StartInst);

for (++BI; !isa<TerminatorInst>(BI); ++BI) {

if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {

// If the instruction is readnone, ignore it, otherwise bail out. We

@@ -439,14 +434,12 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,

// If we create any memsets, we put it right before the first instruction that

// isn't part of the memset block. This ensure that the memset is dominated

// by any addressing instruction needed by the start of the block.

- IRBuilder<> Builder(BI);

+ IRBuilder<> Builder(&*BI);

// Now that we have full information about ranges, loop over the ranges and

// emit memset's for anything big enough to be worthwhile.

Instruction *AMemSet = nullptr;

- for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();

- I != E; ++I) {

- const MemsetRange &Range = *I;

+ for (const MemsetRange &Range : Ranges) {

if (Range.TheStores.size() == 1) continue;

@@ -470,19 +463,17 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,

Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);

DEBUG(dbgs() << "Replace stores:\n";

- for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)

- dbgs() << *Range.TheStores[i] << '\n';

+ for (Instruction *SI : Range.TheStores)

+ dbgs() << *SI << '\n';

dbgs() << "With: " << *AMemSet << '\n');

if (!Range.TheStores.empty())

AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());

// Zap all the stores.

- for (SmallVectorImpl<Instruction *>::const_iterator

- SI = Range.TheStores.begin(),

- SE = Range.TheStores.end(); SI != SE; ++SI) {

- MD->removeInstruction(*SI);

- (*SI)->eraseFromParent();

+ for (Instruction *SI : Range.TheStores) {

+ MD->removeInstruction(SI);

+ SI->eraseFromParent();

}

++NumMemSetInfer;

}

@@ -493,6 +484,16 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,

bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {

if (!SI->isSimple()) return false;

+ // Avoid merging nontemporal stores since the resulting

+ // memcpy/memset would not be able to preserve the nontemporal hint.

+ // In theory we could teach how to propagate the !nontemporal metadata to

+ // memset calls. However, that change would force the backend to

+ // conservatively expand !nontemporal memset calls back to sequences of

+ // store instructions (effectively undoing the merging).

+ if (SI->getMetadata(LLVMContext::MD_nontemporal))

+ return false;

const DataLayout &DL = SI->getModule()->getDataLayout();

// Detect cases where we're performing call slot forwarding, but

@@ -509,11 +510,11 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {

if (C) {

// Check that nothing touches the dest of the "copy" between

// the call and the store.

- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();

+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();

MemoryLocation StoreLoc = MemoryLocation::get(SI);

- for (BasicBlock::iterator I = --BasicBlock::iterator(SI),

- E = C; I != E; --I) {

- if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {

+ for (BasicBlock::iterator I = --SI->getIterator(), E = C->getIterator();

+ I != E; --I) {

+ if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {

C = nullptr;

break;

}

@@ -554,7 +555,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {

if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))

if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),

ByteVal)) {

- BBI = I; // Don't invalidate iterator.

+ BBI = I->getIterator(); // Don't invalidate iterator.

return true;

}

@@ -567,14 +568,14 @@ bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {

if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())

if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),

MSI->getValue())) {

- BBI = I; // Don't invalidate iterator.

+ BBI = I->getIterator(); // Don't invalidate iterator.

return true;

}

return false;

}

-/// performCallSlotOptzn - takes a memcpy and a call that it depends on,

+/// Takes a memcpy and a call that it depends on,

/// and checks for the possibility of a call slot optimization by having

/// the call write its result directly into the destination of the memcpy.

bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,

@@ -710,12 +711,12 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,

// unexpected manner, for example via a global, which we deduce from

// the use analysis, we also need to know that it does not sneakily

// access dest. We rely on AA to figure this out for us.

- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();

- AliasAnalysis::ModRefResult MR = AA.getModRefInfo(C, cpyDest, srcSize);

+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();

+ ModRefInfo MR = AA.getModRefInfo(C, cpyDest, srcSize);

// If necessary, perform additional analysis.

- if (MR != AliasAnalysis::NoModRef)

+ if (MR != MRI_NoModRef)

MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT);

- if (MR != AliasAnalysis::NoModRef)

+ if (MR != MRI_NoModRef)

return false;

// All the checks have passed, so do the transformation.

@@ -749,11 +750,9 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,

// Update AA metadata

// FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be

// handled here, but combineMetadata doesn't support them yet

- unsigned KnownIDs[] = {

- LLVMContext::MD_tbaa,

- LLVMContext::MD_alias_scope,

- LLVMContext::MD_noalias,

- };

+ unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,

+ LLVMContext::MD_noalias,

+ LLVMContext::MD_invariant_group};

combineMetadata(C, cpy, KnownIDs);

// Remove the memcpy.

@@ -763,10 +762,8 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,

return true;

}

-/// processMemCpyMemCpyDependence - We've found that the (upward scanning)

-/// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to

-/// copy from MDep's input if we can.

-///

+/// We've found that the (upward scanning) memory dependence of memcpy 'M' is

+/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.

bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {

// We can only transforms memcpy's where the dest of one is the source of the

// other.

@@ -788,7 +785,7 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {

if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())

return false;

- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();

+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();

// Verify that the copied-from memory doesn't change in between the two

// transfers. For example, in:

@@ -802,8 +799,9 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {

// NOTE: This is conservative, it will stop on any read from the source loc,

// not just the defining memcpy.

- MemDepResult SourceDep = MD->getPointerDependencyFrom(

- MemoryLocation::getForSource(MDep), false, M, M->getParent());

+ MemDepResult SourceDep =

+ MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,

+ M->getIterator(), M->getParent());

if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)

return false;

@@ -860,8 +858,9 @@ bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy,

return false;

// Check that there are no other dependencies on the memset destination.

- MemDepResult DstDepInfo = MD->getPointerDependencyFrom(

- MemoryLocation::getForDest(MemSet), false, MemCpy, MemCpy->getParent());

+ MemDepResult DstDepInfo =

+ MD->getPointerDependencyFrom(MemoryLocation::getForDest(MemSet), false,

+ MemCpy->getIterator(), MemCpy->getParent());

if (DstDepInfo.getInst() != MemSet)

return false;

@@ -936,7 +935,7 @@ bool MemCpyOpt::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,

return true;

}

-/// processMemCpy - perform simplification of memcpy's. If we have memcpy A

+/// Perform simplification of memcpy's. If we have memcpy A

/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite

/// B to be a memcpy from X to Z (or potentially a memmove, depending on

/// circumstances). This allows later passes to remove the first memcpy

@@ -998,8 +997,8 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {

}

MemoryLocation SrcLoc = MemoryLocation::getForSource(M);

- MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,

- M, M->getParent());

+ MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(

+ SrcLoc, true, M->getIterator(), M->getParent());

if (SrcDepInfo.isClobber()) {

if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))

@@ -1037,10 +1036,10 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {

return false;

}

-/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst

-/// are guaranteed not to alias.

+/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed

+/// not to alias.

bool MemCpyOpt::processMemMove(MemMoveInst *M) {

- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();

+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();

if (!TLI->has(LibFunc::memmove))

return false;

@@ -1053,12 +1052,11 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {

DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");

// If not, then we know we can transform this.

- Module *Mod = M->getParent()->getParent()->getParent();

Type *ArgTys[3] = { M->getRawDest()->getType(),

M->getRawSource()->getType(),

M->getLength()->getType() };

- M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy,

- ArgTys));

+ M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),

+ Intrinsic::memcpy, ArgTys));

// MemDep may have over conservative information about this instruction, just

// conservatively flush it from the cache.

@@ -1068,7 +1066,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {

return true;

}

-/// processByValArgument - This is called on every byval argument in call sites.

+/// This is called on every byval argument in call sites.

bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {

const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();

// Find out what feeds this byval argument.

@@ -1076,8 +1074,8 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {

Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();

uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);

MemDepResult DepInfo = MD->getPointerDependencyFrom(

- MemoryLocation(ByValArg, ByValSize), true, CS.getInstruction(),

- CS.getInstruction()->getParent());

+ MemoryLocation(ByValArg, ByValSize), true,

+ CS.getInstruction()->getIterator(), CS.getInstruction()->getParent());

if (!DepInfo.isClobber())

return false;

@@ -1119,9 +1117,9 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {

// NOTE: This is conservative, it will stop on any read from the source loc,

// not just the defining memcpy.

- MemDepResult SourceDep =

- MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,

- CS.getInstruction(), MDep->getParent());

+ MemDepResult SourceDep = MD->getPointerDependencyFrom(

+ MemoryLocation::getForSource(MDep), false,

+ CS.getInstruction()->getIterator(), MDep->getParent());

if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)

return false;

@@ -1140,7 +1138,7 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {

return true;

}

-/// iterateOnFunction - Executes one iteration of MemCpyOpt.

+/// Executes one iteration of MemCpyOpt.

bool MemCpyOpt::iterateOnFunction(Function &F) {

bool MadeChange = false;

@@ -1148,7 +1146,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {

for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {

for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {

// Avoid invalidating the iterator.

- Instruction *I = BI++;

+ Instruction *I = &*BI++;

bool RepeatInstruction = false;

@@ -1177,9 +1175,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {

return MadeChange;

}

-// MemCpyOpt::runOnFunction - This is the main transformation entry point for a

-// function.

-//

+/// This is the main transformation entry point for a function.

bool MemCpyOpt::runOnFunction(Function &F) {

if (skipOptnoneFunction(F))

return false;