src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2023-07-26 19:03:47 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2023-07-26 19:04:23 +0000
commit	7fa27ce4a07f19b07799a767fc29416f3b625afb (patch)
tree	27825c83636c4de341eb09a74f49f5d38a15d165 /llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
parent	e3b557809604d036af6e00c60f012c2025b59a5e (diff)
download	src-7fa27ce4a07f19b07799a767fc29416f3b625afb.tar.gz src-7fa27ce4a07f19b07799a767fc29416f3b625afb.zip

vendor/llvm-project/llvmorg-17-init-19304-gd0b54bb50e51

Diffstat (limited to 'llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp')

-rw-r--r--

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

471

1 files changed, 383 insertions, 88 deletions

diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 64846484f936..68642a01b37c 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

@@ -46,13 +46,10 @@

#include "llvm/IR/Type.h"

#include "llvm/IR/User.h"

#include "llvm/IR/Value.h"

-#include "llvm/InitializePasses.h"

-#include "llvm/Pass.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/MathExtras.h"

#include "llvm/Support/raw_ostream.h"

-#include "llvm/Transforms/Scalar.h"

#include "llvm/Transforms/Utils/Local.h"

#include <algorithm>

#include <cassert>

@@ -72,6 +69,7 @@ STATISTIC(NumMemSetInfer, "Number of memsets inferred");

STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");

STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");

STATISTIC(NumCallSlot, "Number of call slot optimizations performed");

+STATISTIC(NumStackMove, "Number of stack-move optimizations performed");

namespace {

@@ -255,54 +253,6 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,

// MemCpyOptLegacyPass Pass

//===----------------------------------------------------------------------===//

-namespace {

-class MemCpyOptLegacyPass : public FunctionPass {

- MemCpyOptPass Impl;

-public:

- static char ID; // Pass identification, replacement for typeid

- MemCpyOptLegacyPass() : FunctionPass(ID) {

- initializeMemCpyOptLegacyPassPass(*PassRegistry::getPassRegistry());

- }

- bool runOnFunction(Function &F) override;

-private:

- // This transformation requires dominator postdominator info

- void getAnalysisUsage(AnalysisUsage &AU) const override {

- AU.setPreservesCFG();

- AU.addRequired<AssumptionCacheTracker>();

- AU.addRequired<DominatorTreeWrapperPass>();

- AU.addPreserved<DominatorTreeWrapperPass>();

- AU.addPreserved<GlobalsAAWrapperPass>();

- AU.addRequired<TargetLibraryInfoWrapperPass>();

- AU.addRequired<AAResultsWrapperPass>();

- AU.addPreserved<AAResultsWrapperPass>();

- AU.addRequired<MemorySSAWrapperPass>();

- AU.addPreserved<MemorySSAWrapperPass>();

- }

-};

-} // end anonymous namespace

-char MemCpyOptLegacyPass::ID = 0;

-/// The public interface to this file...

-FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); }

-INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",

- false, false)

-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)

-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)

-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)

-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)

-INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)

-INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)

-INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",

- false, false)

// Check that V is either not accessible by the caller, or unwinding cannot

// occur between Start and End.

static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start,

@@ -463,7 +413,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,

// Check to see if this store is to a constant offset from the start ptr.

std::optional<int64_t> Offset =

- isPointerOffset(StartPtr, NextStore->getPointerOperand(), DL);

+ NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr, DL);

if (!Offset)

break;

@@ -477,7 +427,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,

// Check to see if this store is to a constant offset from the start ptr.

std::optional<int64_t> Offset =

- isPointerOffset(StartPtr, MSI->getDest(), DL);

+ MSI->getDest()->getPointerOffsetFrom(StartPtr, DL);

if (!Offset)

break;

@@ -781,6 +731,23 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,

return true;

}

+ // If this is a load-store pair from a stack slot to a stack slot, we

+ // might be able to perform the stack-move optimization just as we do for

+ // memcpys from an alloca to an alloca.

+ if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand())) {

+ if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand())) {

+ if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca,

+ DL.getTypeStoreSize(T), BAA)) {

+ // Avoid invalidating the iterator.

+ BBI = SI->getNextNonDebugInstruction()->getIterator();

+ eraseInstruction(SI);

+ eraseInstruction(LI);

+ ++NumMemCpyInstr;

+ return true;

+ }

return false;

}

@@ -1200,8 +1167,14 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,

// still want to eliminate the intermediate value, but we have to generate a

// memmove instead of memcpy.

bool UseMemMove = false;

- if (isModSet(BAA.getModRefInfo(M, MemoryLocation::getForSource(MDep))))

+ if (isModSet(BAA.getModRefInfo(M, MemoryLocation::getForSource(MDep)))) {

+ // Don't convert llvm.memcpy.inline into memmove because memmove can be

+ // lowered as a call, and that is not allowed for llvm.memcpy.inline (and

+ // there is no inline version of llvm.memmove)

+ if (isa<MemCpyInlineInst>(M))

+ return false;

UseMemMove = true;

+ }

// If all checks passed, then we can transform M.

LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"

@@ -1246,13 +1219,18 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,

/// In other words, transform:

/// \code

/// memset(dst, c, dst_size);

+/// ...

/// memcpy(dst, src, src_size);

/// \endcode

/// into:

/// \code

-/// memcpy(dst, src, src_size);

+/// ...

/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);

+/// memcpy(dst, src, src_size);

/// \endcode

+///

+/// The memset is sunk to just before the memcpy to ensure that src_size is

+/// present when emitting the simplified memset.

bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,

MemSetInst *MemSet,

BatchAAResults &BAA) {

@@ -1300,6 +1278,15 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,

IRBuilder<> Builder(MemCpy);

+ // Preserve the debug location of the old memset for the code emitted here

+ // related to the new memset. This is correct according to the rules in

+ // https://llvm.org/docs/HowToUpdateDebugInfo.html about "when to preserve an

+ // instruction location", given that we move the memset within the basic

+ // block.

+ assert(MemSet->getParent() == MemCpy->getParent() &&

+ "Preserving debug location based on moving memset within BB.");

+ Builder.SetCurrentDebugLocation(MemSet->getDebugLoc());

// If the sizes have different types, zext the smaller one.

if (DestSize->getType() != SrcSize->getType()) {

if (DestSize->getType()->getIntegerBitWidth() >

@@ -1323,9 +1310,8 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,

assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&

"MemCpy must be a MemoryDef");

- // The new memset is inserted after the memcpy, but it is known that its

- // defining access is the memset about to be removed which immediately

- // precedes the memcpy.

+ // The new memset is inserted before the memcpy, and it is known that the

+ // memcpy's defining access is the memset about to be removed.

auto *LastDef =

cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));

auto *NewAccess = MSSAU->createMemoryAccessBefore(

@@ -1440,6 +1426,217 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,

return true;

}

+// Attempts to optimize the pattern whereby memory is copied from an alloca to

+// another alloca, where the two allocas don't have conflicting mod/ref. If

+// successful, the two allocas can be merged into one and the transfer can be

+// deleted. This pattern is generated frequently in Rust, due to the ubiquity of

+// move operations in that language.

+//

+// Once we determine that the optimization is safe to perform, we replace all

+// uses of the destination alloca with the source alloca. We also "shrink wrap"

+// the lifetime markers of the single merged alloca to before the first use

+// and after the last use. Note that the "shrink wrapping" procedure is a safe

+// transformation only because we restrict the scope of this optimization to

+// allocas that aren't captured.

+bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,

+ AllocaInst *DestAlloca,

+ AllocaInst *SrcAlloca, uint64_t Size,

+ BatchAAResults &BAA) {

+ LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"

+ << *Store << "\n");

+ // Make sure the two allocas are in the same address space.

+ if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) {

+ LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n");

+ return false;

+ }

+ // 1. Check that copy is full. Calculate the static size of the allocas to be

+ // merged, bail out if we can't.

+ const DataLayout &DL = DestAlloca->getModule()->getDataLayout();

+ std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);

+ if (!SrcSize || SrcSize->isScalable() || Size != SrcSize->getFixedValue()) {

+ LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");

+ return false;

+ }

+ std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);

+ if (!DestSize || DestSize->isScalable() ||

+ Size != DestSize->getFixedValue()) {

+ LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");

+ return false;

+ }

+ // 2-1. Check that src and dest are static allocas, which are not affected by

+ // stacksave/stackrestore.

+ if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca() ||

+ SrcAlloca->getParent() != Load->getParent() ||

+ SrcAlloca->getParent() != Store->getParent())

+ return false;

+ // 2-2. Check that src and dest are never captured, unescaped allocas. Also

+ // collect lifetime markers first/last users in order to shrink wrap the

+ // lifetimes, and instructions with noalias metadata to remove them.

+ SmallVector<Instruction *, 4> LifetimeMarkers;

+ Instruction *FirstUser = nullptr, *LastUser = nullptr;

+ SmallSet<Instruction *, 4> NoAliasInstrs;

+ // Recursively track the user and check whether modified alias exist.

+ auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {

+ bool CanBeNull, CanBeFreed;

+ return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);

+ };

+ auto CaptureTrackingWithModRef =

+ [&](Instruction *AI,

+ function_ref<bool(Instruction *)> ModRefCallback) -> bool {

+ SmallVector<Instruction *, 8> Worklist;

+ Worklist.push_back(AI);

+ unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();

+ Worklist.reserve(MaxUsesToExplore);

+ SmallSet<const Use *, 20> Visited;

+ while (!Worklist.empty()) {

+ Instruction *I = Worklist.back();

+ Worklist.pop_back();

+ for (const Use &U : I->uses()) {

+ if (Visited.size() >= MaxUsesToExplore) {

+ LLVM_DEBUG(

+ dbgs()

+ << "Stack Move: Exceeded max uses to see ModRef, bailing\n");

+ return false;

+ }

+ if (!Visited.insert(&U).second)

+ continue;

+ switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) {

+ case UseCaptureKind::MAY_CAPTURE:

+ return false;

+ case UseCaptureKind::PASSTHROUGH:

+ // Instructions cannot have non-instruction users.

+ Worklist.push_back(cast<Instruction>(U.getUser()));

+ continue;

+ case UseCaptureKind::NO_CAPTURE: {

+ auto *UI = cast<Instruction>(U.getUser());

+ if (DestAlloca->getParent() != UI->getParent())

+ return false;

+ if (!FirstUser || UI->comesBefore(FirstUser))

+ FirstUser = UI;

+ if (!LastUser || LastUser->comesBefore(UI))

+ LastUser = UI;

+ if (UI->isLifetimeStartOrEnd()) {

+ // We note the locations of these intrinsic calls so that we can

+ // delete them later if the optimization succeeds, this is safe

+ // since both llvm.lifetime.start and llvm.lifetime.end intrinsics

+ // conceptually fill all the bytes of the alloca with an undefined

+ // value.

+ int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();

+ if (Size < 0 || Size == DestSize) {

+ LifetimeMarkers.push_back(UI);

+ continue;

+ }

+ if (UI->hasMetadata(LLVMContext::MD_noalias))

+ NoAliasInstrs.insert(UI);

+ if (!ModRefCallback(UI))

+ return false;

+ }

+ return true;

+ };

+ // 3. Check that dest has no Mod/Ref, except full size lifetime intrinsics,

+ // from the alloca to the Store.

+ ModRefInfo DestModRef = ModRefInfo::NoModRef;

+ MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));

+ auto DestModRefCallback = [&](Instruction *UI) -> bool {

+ // We don't care about the store itself.

+ if (UI == Store)

+ return true;

+ ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);

+ // FIXME: For multi-BB cases, we need to see reachability from it to

+ // store.

+ // Bailout if Dest may have any ModRef before Store.

+ if (UI->comesBefore(Store) && isModOrRefSet(Res))

+ return false;

+ DestModRef |= BAA.getModRefInfo(UI, DestLoc);

+ return true;

+ };

+ if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))

+ return false;

+ // 3. Check that, from after the Load to the end of the BB,

+ // 3-1. if the dest has any Mod, src has no Ref, and

+ // 3-2. if the dest has any Ref, src has no Mod except full-sized lifetimes.

+ MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));

+ auto SrcModRefCallback = [&](Instruction *UI) -> bool {

+ // Any ModRef before Load doesn't matter, also Load and Store can be

+ // ignored.

+ if (UI->comesBefore(Load) || UI == Load || UI == Store)

+ return true;

+ ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);

+ if ((isModSet(DestModRef) && isRefSet(Res)) ||

+ (isRefSet(DestModRef) && isModSet(Res)))

+ return false;

+ return true;

+ };

+ if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))

+ return false;

+ // We can do the transformation. First, align the allocas appropriately.

+ SrcAlloca->setAlignment(

+ std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));

+ // Merge the two allocas.

+ DestAlloca->replaceAllUsesWith(SrcAlloca);

+ eraseInstruction(DestAlloca);

+ // Drop metadata on the source alloca.

+ SrcAlloca->dropUnknownNonDebugMetadata();

+ // Do "shrink wrap" the lifetimes, if the original lifetime intrinsics exists.

+ if (!LifetimeMarkers.empty()) {

+ LLVMContext &C = SrcAlloca->getContext();

+ IRBuilder<> Builder(C);

+ ConstantInt *AllocaSize = ConstantInt::get(Type::getInt64Ty(C), Size);

+ // Create a new lifetime start marker before the first user of src or alloca

+ // users.

+ Builder.SetInsertPoint(FirstUser->getParent(), FirstUser->getIterator());

+ Builder.CreateLifetimeStart(SrcAlloca, AllocaSize);

+ // Create a new lifetime end marker after the last user of src or alloca

+ // users.

+ // FIXME: If the last user is the terminator for the bb, we can insert

+ // lifetime.end marker to the immidiate post-dominator, but currently do

+ // nothing.

+ if (!LastUser->isTerminator()) {

+ Builder.SetInsertPoint(LastUser->getParent(), ++LastUser->getIterator());

+ Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize);

+ }

+ // Remove all other lifetime markers.

+ for (Instruction *I : LifetimeMarkers)

+ eraseInstruction(I);

+ }

+ // As this transformation can cause memory accesses that didn't previously

+ // alias to begin to alias one another, we remove !noalias metadata from any

+ // uses of either alloca. This is conservative, but more precision doesn't

+ // seem worthwhile right now.

+ for (Instruction *I : NoAliasInstrs)

+ I->setMetadata(LLVMContext::MD_noalias, nullptr);

+ LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");

+ NumStackMove++;

+ return true;

/// Perform simplification of memcpy's. If we have memcpy A

/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite

/// B to be a memcpy from X to Z (or potentially a memmove, depending on

@@ -1484,8 +1681,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {

MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA);

// Try to turn a partially redundant memset + memcpy into

- // memcpy + smaller memset. We don't need the memcpy size for this.

- // The memcpy most post-dom the memset, so limit this to the same basic

+ // smaller memset + memcpy. We don't need the memcpy size for this.

+ // The memcpy must post-dom the memset, so limit this to the same basic

// block. A non-local generalization is likely not worthwhile.

if (auto *MD = dyn_cast<MemoryDef>(DestClobber))

if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))

@@ -1496,13 +1693,14 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {

MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(

AnyClobber, MemoryLocation::getForSource(M), BAA);

- // There are four possible optimizations we can do for memcpy:

+ // There are five possible optimizations we can do for memcpy:

// a) memcpy-memcpy xform which exposes redundance for DSE.

// b) call-memcpy xform for return slot optimization.

// c) memcpy from freshly alloca'd space or space that has just started

// its lifetime copies undefined data, and we can therefore eliminate

// the memcpy in favor of the data that was already at the destination.

// d) memcpy from a just-memset'd source can be turned into memset.

+ // e) elimination of memcpy via stack-move optimization.

if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {

if (Instruction *MI = MD->getMemoryInst()) {

if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {

@@ -1521,7 +1719,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {

}

if (auto *MDep = dyn_cast<MemCpyInst>(MI))

- return processMemCpyMemCpyDependence(M, MDep, BAA);

+ if (processMemCpyMemCpyDependence(M, MDep, BAA))

+ return true;

if (auto *MDep = dyn_cast<MemSetInst>(MI)) {

if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {

LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");

@@ -1540,6 +1739,27 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {

}

+ // If the transfer is from a stack slot to a stack slot, then we may be able

+ // to perform the stack-move optimization. See the comments in

+ // performStackMoveOptzn() for more details.

+ auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest());

+ if (!DestAlloca)

+ return false;

+ auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource());

+ if (!SrcAlloca)

+ return false;

+ ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength());

+ if (Len == nullptr)

+ return false;

+ if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca, Len->getZExtValue(),

+ BAA)) {

+ // Avoid invalidating the iterator.

+ BBI = M->getNextNonDebugInstruction()->getIterator();

+ eraseInstruction(M);

+ ++NumMemCpyInstr;

+ return true;

+ }

return false;

}

@@ -1623,24 +1843,110 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {

// foo(*a)

// It would be invalid to transform the second memcpy into foo(*b).

if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),

- MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))

+ MSSA->getMemoryAccess(MDep), CallAccess))

return false;

- Value *TmpCast = MDep->getSource();

- if (MDep->getSource()->getType() != ByValArg->getType()) {

- BitCastInst *TmpBitCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),

- "tmpcast", &CB);

- // Set the tmpcast's DebugLoc to MDep's

- TmpBitCast->setDebugLoc(MDep->getDebugLoc());

- TmpCast = TmpBitCast;

- }

LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"

<< " " << *MDep << "\n"

<< " " << CB << "\n");

// Otherwise we're good! Update the byval argument.

- CB.setArgOperand(ArgNo, TmpCast);

+ CB.setArgOperand(ArgNo, MDep->getSource());

+ ++NumMemCpyInstr;

+ return true;

+/// This is called on memcpy dest pointer arguments attributed as immutable

+/// during call. Try to use memcpy source directly if all of the following

+/// conditions are satisfied.

+/// 1. The memcpy dst is neither modified during the call nor captured by the

+/// call. (if readonly, noalias, nocapture attributes on call-site.)

+/// 2. The memcpy dst is an alloca with known alignment & size.

+/// 2-1. The memcpy length == the alloca size which ensures that the new

+/// pointer is dereferenceable for the required range

+/// 2-2. The src pointer has alignment >= the alloca alignment or can be

+/// enforced so.

+/// 3. The memcpy dst and src is not modified between the memcpy and the call.

+/// (if MSSA clobber check is safe.)

+/// 4. The memcpy src is not modified during the call. (ModRef check shows no

+/// Mod.)

+bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) {

+ // 1. Ensure passed argument is immutable during call.

+ if (!(CB.paramHasAttr(ArgNo, Attribute::NoAlias) &&

+ CB.paramHasAttr(ArgNo, Attribute::NoCapture)))

+ return false;

+ const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();

+ Value *ImmutArg = CB.getArgOperand(ArgNo);

+ // 2. Check that arg is alloca

+ // TODO: Even if the arg gets back to branches, we can remove memcpy if all

+ // the alloca alignments can be enforced to source alignment.

+ auto *AI = dyn_cast<AllocaInst>(ImmutArg->stripPointerCasts());

+ if (!AI)

+ return false;

+ std::optional<TypeSize> AllocaSize = AI->getAllocationSize(DL);

+ // Can't handle unknown size alloca.

+ // (e.g. Variable Length Array, Scalable Vector)

+ if (!AllocaSize || AllocaSize->isScalable())

+ return false;

+ MemoryLocation Loc(ImmutArg, LocationSize::precise(*AllocaSize));

+ MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);

+ if (!CallAccess)

+ return false;

+ MemCpyInst *MDep = nullptr;

+ BatchAAResults BAA(*AA);

+ MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(

+ CallAccess->getDefiningAccess(), Loc, BAA);

+ if (auto *MD = dyn_cast<MemoryDef>(Clobber))

+ MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());

+ // If the immut argument isn't fed by a memcpy, ignore it. If it is fed by

+ // a memcpy, check that the arg equals the memcpy dest.

+ if (!MDep || MDep->isVolatile() || AI != MDep->getDest())

+ return false;

+ // The address space of the memcpy source must match the immut argument

+ if (MDep->getSource()->getType()->getPointerAddressSpace() !=

+ ImmutArg->getType()->getPointerAddressSpace())

+ return false;

+ // 2-1. The length of the memcpy must be equal to the size of the alloca.

+ auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());

+ if (!MDepLen || AllocaSize != MDepLen->getValue())

+ return false;

+ // 2-2. the memcpy source align must be larger than or equal the alloca's

+ // align. If not so, we check to see if we can force the source of the memcpy

+ // to the alignment we need. If we fail, we bail out.

+ Align MemDepAlign = MDep->getSourceAlign().valueOrOne();

+ Align AllocaAlign = AI->getAlign();

+ if (MemDepAlign < AllocaAlign &&

+ getOrEnforceKnownAlignment(MDep->getSource(), AllocaAlign, DL, &CB, AC,

+ DT) < AllocaAlign)

+ return false;

+ // 3. Verify that the source doesn't change in between the memcpy and

+ // the call.

+ // memcpy(a <- b)

+ // *b = 42;

+ // foo(*a)

+ // It would be invalid to transform the second memcpy into foo(*b).

+ if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),

+ MSSA->getMemoryAccess(MDep), CallAccess))

+ return false;

+ // 4. The memcpy src must not be modified during the call.

+ if (isModSet(AA->getModRefInfo(&CB, MemoryLocation::getForSource(MDep))))

+ return false;

+ LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n"

+ << " " << *MDep << "\n"

+ << " " << CB << "\n");

+ // Otherwise we're good! Update the immut argument.

+ CB.setArgOperand(ArgNo, MDep->getSource());

++NumMemCpyInstr;

return true;

}

@@ -1673,9 +1979,12 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {

else if (auto *M = dyn_cast<MemMoveInst>(I))

RepeatInstruction = processMemMove(M);

else if (auto *CB = dyn_cast<CallBase>(I)) {

- for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)

+ for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) {

if (CB->isByValArgument(i))

MadeChange |= processByValArgument(*CB, i);

+ else if (CB->onlyReadsMemory(i))

+ MadeChange |= processImmutArgument(*CB, i);

+ }

}

// Reprocess the instruction if desired.

@@ -1730,17 +2039,3 @@ bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_,

return MadeChange;

}

-/// This is the main transformation entry point for a function.

-bool MemCpyOptLegacyPass::runOnFunction(Function &F) {

- if (skipFunction(F))

- return false;

- auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);

- auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();

- auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);

- auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();

- auto *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();

- return Impl.runImpl(F, TLI, AA, AC, DT, MSSA);