src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2021-07-29 20:15:26 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2021-07-29 20:15:26 +0000
commit	344a3780b2e33f6ca763666c380202b18aab72a3 (patch)
tree	f0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/Transforms/Utils/LoopUtils.cpp
parent	b60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)

vendor/llvm-project/llvmorg-13-init-16847-g88e66fa60ae5 vendor/llvm-project/llvmorg-12.0.1-rc2-0-ge7dac564cd0e vendor/llvm-project/llvmorg-12.0.1-0-gfed41342a82f

Diffstat (limited to 'llvm/lib/Transforms/Utils/LoopUtils.cpp')

-rw-r--r--

llvm/lib/Transforms/Utils/LoopUtils.cpp

334

1 files changed, 209 insertions, 125 deletions

diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index f0f423e9812a..e4d78f9ada08 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp

@@ -54,16 +54,10 @@

using namespace llvm;

using namespace llvm::PatternMatch;

-static cl::opt<bool> ForceReductionIntrinsic(

- "force-reduction-intrinsics", cl::Hidden,

- cl::desc("Force creating reduction intrinsics for testing."),

- cl::init(false));

#define DEBUG_TYPE "loop-utils"

static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";

static const char *LLVMLoopDisableLICM = "llvm.licm.disable";

-static const char *LLVMLoopMustProgress = "llvm.loop.mustprogress";

bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,

MemorySSAUpdater *MSSAU,

@@ -260,50 +254,8 @@ void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD,

TheLoop->setLoopID(NewLoopID);

}

-/// Find string metadata for loop

-///

-/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an

-/// operand or null otherwise. If the string metadata is not found return

-/// Optional's not-a-value.

-Optional<const MDOperand *> llvm::findStringMetadataForLoop(const Loop *TheLoop,

- StringRef Name) {

- MDNode *MD = findOptionMDForLoop(TheLoop, Name);

- if (!MD)

- return None;

- switch (MD->getNumOperands()) {

- case 1:

- return nullptr;

- case 2:

- return &MD->getOperand(1);

- default:

- llvm_unreachable("loop metadata has 0 or 1 operand");

- }

-static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,

- StringRef Name) {

- MDNode *MD = findOptionMDForLoop(TheLoop, Name);

- if (!MD)

- return None;

- switch (MD->getNumOperands()) {

- case 1:

- // When the value is absent it is interpreted as 'attribute set'.

- return true;

- case 2:

- if (ConstantInt *IntMD =

- mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get()))

- return IntMD->getZExtValue();

- return true;

- }

- llvm_unreachable("unexpected number of options");

-bool llvm::getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) {

- return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false);

Optional<ElementCount>

-llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) {

+llvm::getOptionalElementCountLoopAttribute(const Loop *TheLoop) {

Optional<int> Width =

getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");

@@ -316,20 +268,6 @@ llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) {

return None;

}

-llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop,

- StringRef Name) {

- const MDOperand *AttrMD =

- findStringMetadataForLoop(TheLoop, Name).getValueOr(nullptr);

- if (!AttrMD)

- return None;

- ConstantInt *IntMD = mdconst::extract_or_null<ConstantInt>(AttrMD->get());

- if (!IntMD)

- return None;

- return IntMD->getSExtValue();

Optional<MDNode *> llvm::makeFollowupLoopID(

MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,

const char *InheritOptionsExceptPrefix, bool AlwaysNew) {

@@ -419,11 +357,7 @@ bool llvm::hasDisableLICMTransformsHint(const Loop *L) {

return getBooleanLoopAttribute(L, LLVMLoopDisableLICM);

}

-bool llvm::hasMustProgress(const Loop *L) {

- return getBooleanLoopAttribute(L, LLVMLoopMustProgress);

-TransformationMode llvm::hasUnrollTransformation(Loop *L) {

+TransformationMode llvm::hasUnrollTransformation(const Loop *L) {

if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))

return TM_SuppressedByUser;

@@ -444,7 +378,7 @@ TransformationMode llvm::hasUnrollTransformation(Loop *L) {

return TM_Unspecified;

}

-TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) {

+TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) {

if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))

return TM_SuppressedByUser;

@@ -462,7 +396,7 @@ TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) {

return TM_Unspecified;

}

-TransformationMode llvm::hasVectorizeTransformation(Loop *L) {

+TransformationMode llvm::hasVectorizeTransformation(const Loop *L) {

Optional<bool> Enable =

getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");

@@ -498,7 +432,7 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) {

return TM_Unspecified;

}

-TransformationMode llvm::hasDistributeTransformation(Loop *L) {

+TransformationMode llvm::hasDistributeTransformation(const Loop *L) {

if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))

return TM_ForcedByUser;

@@ -508,7 +442,7 @@ TransformationMode llvm::hasDistributeTransformation(Loop *L) {

return TM_Unspecified;

}

-TransformationMode llvm::hasLICMVersioningTransformation(Loop *L) {

+TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) {

if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))

return TM_SuppressedByUser;

@@ -789,8 +723,8 @@ void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,

auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());

DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);

- (void)changeToUnreachable(BackedgeBB->getTerminator(), /*UseTrap*/false,

- /*PreserveLCSSA*/true, &DTU, MSSAU.get());

+ (void)changeToUnreachable(BackedgeBB->getTerminator(),

+ /*PreserveLCSSA*/ true, &DTU, MSSAU.get());

// Erase (and destroy) this loop instance. Handles relinking sub-loops

// and blocks within the loop as needed.

@@ -944,12 +878,6 @@ Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,

break;

}

- // We only match FP sequences that are 'fast', so we can unconditionally

- // set it on any generated instructions.

- IRBuilderBase::FastMathFlagGuard FMFG(Builder);

- FastMathFlags FMF;

- FMF.setFast();

- Builder.setFastMathFlags(FMF);

Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp");

Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");

return Select;

@@ -1031,14 +959,10 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,

const TargetTransformInfo *TTI,

Value *Src, RecurKind RdxKind,

ArrayRef<Value *> RedOps) {

- unsigned Opcode = RecurrenceDescriptor::getOpcode(RdxKind);

TargetTransformInfo::ReductionFlags RdxFlags;

RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || RdxKind == RecurKind::UMax ||

RdxKind == RecurKind::FMax;

RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin;

- if (!ForceReductionIntrinsic &&

- !TTI->useReductionIntrinsic(Opcode, Src->getType(), RdxFlags))

- return getShuffleReduction(Builder, Src, Opcode, RdxKind, RedOps);

auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();

switch (RdxKind) {

@@ -1076,7 +1000,8 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,

Value *llvm::createTargetReduction(IRBuilderBase &B,

const TargetTransformInfo *TTI,

- RecurrenceDescriptor &Desc, Value *Src) {

+ const RecurrenceDescriptor &Desc,

+ Value *Src) {

// TODO: Support in-order reductions based on the recurrence descriptor.

// All ops in the reduction inherit fast-math-flags from the recurrence

// descriptor.

@@ -1085,6 +1010,17 @@ Value *llvm::createTargetReduction(IRBuilderBase &B,

return createSimpleTargetReduction(B, TTI, Src, Desc.getRecurrenceKind());

}

+Value *llvm::createOrderedReduction(IRBuilderBase &B,

+ const RecurrenceDescriptor &Desc,

+ Value *Src, Value *Start) {

+ assert(Desc.getRecurrenceKind() == RecurKind::FAdd &&

+ "Unexpected reduction kind");

+ assert(Src->getType()->isVectorTy() && "Expected a vector type");

+ assert(!Start->getType()->isVectorTy() && "Expected a scalar type");

+ return B.CreateFAddReduce(Start, Src);

void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {

auto *VecOp = dyn_cast<Instruction>(I);

if (!VecOp)

@@ -1587,55 +1523,31 @@ struct PointerBounds {

/// in \p TheLoop. \return the values for the bounds.

static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG,

Loop *TheLoop, Instruction *Loc,

- SCEVExpander &Exp, ScalarEvolution *SE) {

- // TODO: Add helper to retrieve pointers to CG.

- Value *Ptr = CG->RtCheck.Pointers[CG->Members[0]].PointerValue;

- const SCEV *Sc = SE->getSCEV(Ptr);

- unsigned AS = Ptr->getType()->getPointerAddressSpace();

+ SCEVExpander &Exp) {

LLVMContext &Ctx = Loc->getContext();

- // Use this type for pointer arithmetic.

- Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);

- if (SE->isLoopInvariant(Sc, TheLoop)) {

- LLVM_DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:"

- << *Ptr << "\n");

- // Ptr could be in the loop body. If so, expand a new one at the correct

- // location.

- Instruction *Inst = dyn_cast<Instruction>(Ptr);

- Value *NewPtr = (Inst && TheLoop->contains(Inst))

- ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)

- : Ptr;

- // We must return a half-open range, which means incrementing Sc.

- const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy));

- Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc);

- return {NewPtr, NewPtrPlusOne};

- } else {

- Value *Start = nullptr, *End = nullptr;

- LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");

- Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);

- End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);

- LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High

- << "\n");

- return {Start, End};

- }

+ Type *PtrArithTy = Type::getInt8PtrTy(Ctx, CG->AddressSpace);

+ Value *Start = nullptr, *End = nullptr;

+ LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");

+ Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);

+ End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);

+ LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n");

+ return {Start, End};

}

/// Turns a collection of checks into a collection of expanded upper and

/// lower bounds for both pointers in the check.

static SmallVector<std::pair<PointerBounds, PointerBounds>, 4>

expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,

- Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp) {

+ Instruction *Loc, SCEVExpander &Exp) {

SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds;

// Here we're relying on the SCEV Expander's cache to only emit code for the

// same bounds once.

transform(PointerChecks, std::back_inserter(ChecksWithBounds),

[&](const RuntimePointerCheck &Check) {

- PointerBounds First = expandBounds(Check.first, L, Loc, Exp, SE),

- Second =

- expandBounds(Check.second, L, Loc, Exp, SE);

+ PointerBounds First = expandBounds(Check.first, L, Loc, Exp),

+ Second = expandBounds(Check.second, L, Loc, Exp);

return std::make_pair(First, Second);

});

@@ -1645,12 +1557,10 @@ expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,

std::pair<Instruction *, Instruction *> llvm::addRuntimeChecks(

Instruction *Loc, Loop *TheLoop,

const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,

- ScalarEvolution *SE) {

+ SCEVExpander &Exp) {

// TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.

// TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible

- const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();

- SCEVExpander Exp(*SE, DL, "induction");

- auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, SE, Exp);

+ auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, Exp);

LLVMContext &Ctx = Loc->getContext();

Instruction *FirstInst = nullptr;

@@ -1722,3 +1632,177 @@ std::pair<Instruction *, Instruction *> llvm::addRuntimeChecks(

FirstInst = GetFirstInst(FirstInst, Check, Loc);

return std::make_pair(FirstInst, Check);

}

+Optional<IVConditionInfo> llvm::hasPartialIVCondition(Loop &L,

+ unsigned MSSAThreshold,

+ MemorySSA &MSSA,

+ AAResults &AA) {

+ auto *TI = dyn_cast<BranchInst>(L.getHeader()->getTerminator());

+ if (!TI || !TI->isConditional())

+ return {};

+ auto *CondI = dyn_cast<CmpInst>(TI->getCondition());

+ // The case with the condition outside the loop should already be handled

+ // earlier.

+ if (!CondI || !L.contains(CondI))

+ return {};

+ SmallVector<Instruction *> InstToDuplicate;

+ InstToDuplicate.push_back(CondI);

+ SmallVector<Value *, 4> WorkList;

+ WorkList.append(CondI->op_begin(), CondI->op_end());

+ SmallVector<MemoryAccess *, 4> AccessesToCheck;

+ SmallVector<MemoryLocation, 4> AccessedLocs;

+ while (!WorkList.empty()) {

+ Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val());

+ if (!I || !L.contains(I))

+ continue;

+ // TODO: support additional instructions.

+ if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I))

+ return {};

+ // Do not duplicate volatile and atomic loads.

+ if (auto *LI = dyn_cast<LoadInst>(I))

+ if (LI->isVolatile() || LI->isAtomic())

+ return {};

+ InstToDuplicate.push_back(I);

+ if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {

+ if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {

+ // Queue the defining access to check for alias checks.

+ AccessesToCheck.push_back(MemUse->getDefiningAccess());

+ AccessedLocs.push_back(MemoryLocation::get(I));

+ } else {

+ // MemoryDefs may clobber the location or may be atomic memory

+ // operations. Bail out.

+ return {};

+ }

+ WorkList.append(I->op_begin(), I->op_end());

+ }

+ if (InstToDuplicate.empty())

+ return {};

+ SmallVector<BasicBlock *, 4> ExitingBlocks;

+ L.getExitingBlocks(ExitingBlocks);

+ auto HasNoClobbersOnPath =

+ [&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate,

+ MSSAThreshold](BasicBlock *Succ, BasicBlock *Header,

+ SmallVector<MemoryAccess *, 4> AccessesToCheck)

+ -> Optional<IVConditionInfo> {

+ IVConditionInfo Info;

+ // First, collect all blocks in the loop that are on a patch from Succ

+ // to the header.

+ SmallVector<BasicBlock *, 4> WorkList;

+ WorkList.push_back(Succ);

+ WorkList.push_back(Header);

+ SmallPtrSet<BasicBlock *, 4> Seen;

+ Seen.insert(Header);

+ Info.PathIsNoop &=

+ all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); });

+ while (!WorkList.empty()) {

+ BasicBlock *Current = WorkList.pop_back_val();

+ if (!L.contains(Current))

+ continue;

+ const auto &SeenIns = Seen.insert(Current);

+ if (!SeenIns.second)

+ continue;

+ Info.PathIsNoop &= all_of(

+ *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); });

+ WorkList.append(succ_begin(Current), succ_end(Current));

+ }

+ // Require at least 2 blocks on a path through the loop. This skips

+ // paths that directly exit the loop.

+ if (Seen.size() < 2)

+ return {};

+ // Next, check if there are any MemoryDefs that are on the path through

+ // the loop (in the Seen set) and they may-alias any of the locations in

+ // AccessedLocs. If that is the case, they may modify the condition and

+ // partial unswitching is not possible.

+ SmallPtrSet<MemoryAccess *, 4> SeenAccesses;

+ while (!AccessesToCheck.empty()) {

+ MemoryAccess *Current = AccessesToCheck.pop_back_val();

+ auto SeenI = SeenAccesses.insert(Current);

+ if (!SeenI.second || !Seen.contains(Current->getBlock()))

+ continue;

+ // Bail out if exceeded the threshold.

+ if (SeenAccesses.size() >= MSSAThreshold)

+ return {};

+ // MemoryUse are read-only accesses.

+ if (isa<MemoryUse>(Current))

+ continue;

+ // For a MemoryDef, check if is aliases any of the location feeding

+ // the original condition.

+ if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {

+ if (any_of(AccessedLocs, [&AA, CurrentDef](MemoryLocation &Loc) {

+ return isModSet(

+ AA.getModRefInfo(CurrentDef->getMemoryInst(), Loc));

+ }))

+ return {};

+ }

+ for (Use &U : Current->uses())

+ AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));

+ }

+ // We could also allow loops with known trip counts without mustprogress,

+ // but ScalarEvolution may not be available.

+ Info.PathIsNoop &= isMustProgress(&L);

+ // If the path is considered a no-op so far, check if it reaches a

+ // single exit block without any phis. This ensures no values from the

+ // loop are used outside of the loop.

+ if (Info.PathIsNoop) {

+ for (auto *Exiting : ExitingBlocks) {

+ if (!Seen.contains(Exiting))

+ continue;

+ for (auto *Succ : successors(Exiting)) {

+ if (L.contains(Succ))

+ continue;

+ Info.PathIsNoop &= llvm::empty(Succ->phis()) &&

+ (!Info.ExitForPath || Info.ExitForPath == Succ);

+ if (!Info.PathIsNoop)

+ break;

+ assert((!Info.ExitForPath || Info.ExitForPath == Succ) &&

+ "cannot have multiple exit blocks");

+ Info.ExitForPath = Succ;

+ }

+ if (!Info.ExitForPath)

+ Info.PathIsNoop = false;

+ Info.InstToDuplicate = InstToDuplicate;

+ return Info;

+ };

+ // If we branch to the same successor, partial unswitching will not be

+ // beneficial.

+ if (TI->getSuccessor(0) == TI->getSuccessor(1))

+ return {};

+ if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L.getHeader(),

+ AccessesToCheck)) {

+ Info->KnownValue = ConstantInt::getTrue(TI->getContext());

+ return Info;

+ }

+ if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L.getHeader(),

+ AccessesToCheck)) {

+ Info->KnownValue = ConstantInt::getFalse(TI->getContext());

+ return Info;

+ }

+ return {};