aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Utils/LoopUtils.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Utils/LoopUtils.cpp')
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp334
1 files changed, 209 insertions, 125 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index f0f423e9812a..e4d78f9ada08 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -54,16 +54,10 @@
using namespace llvm;
using namespace llvm::PatternMatch;
-static cl::opt<bool> ForceReductionIntrinsic(
- "force-reduction-intrinsics", cl::Hidden,
- cl::desc("Force creating reduction intrinsics for testing."),
- cl::init(false));
-
#define DEBUG_TYPE "loop-utils"
static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
static const char *LLVMLoopDisableLICM = "llvm.licm.disable";
-static const char *LLVMLoopMustProgress = "llvm.loop.mustprogress";
bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
MemorySSAUpdater *MSSAU,
@@ -260,50 +254,8 @@ void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD,
TheLoop->setLoopID(NewLoopID);
}
-/// Find string metadata for loop
-///
-/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
-/// operand or null otherwise. If the string metadata is not found return
-/// Optional's not-a-value.
-Optional<const MDOperand *> llvm::findStringMetadataForLoop(const Loop *TheLoop,
- StringRef Name) {
- MDNode *MD = findOptionMDForLoop(TheLoop, Name);
- if (!MD)
- return None;
- switch (MD->getNumOperands()) {
- case 1:
- return nullptr;
- case 2:
- return &MD->getOperand(1);
- default:
- llvm_unreachable("loop metadata has 0 or 1 operand");
- }
-}
-
-static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,
- StringRef Name) {
- MDNode *MD = findOptionMDForLoop(TheLoop, Name);
- if (!MD)
- return None;
- switch (MD->getNumOperands()) {
- case 1:
- // When the value is absent it is interpreted as 'attribute set'.
- return true;
- case 2:
- if (ConstantInt *IntMD =
- mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get()))
- return IntMD->getZExtValue();
- return true;
- }
- llvm_unreachable("unexpected number of options");
-}
-
-bool llvm::getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) {
- return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false);
-}
-
Optional<ElementCount>
-llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) {
+llvm::getOptionalElementCountLoopAttribute(const Loop *TheLoop) {
Optional<int> Width =
getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");
@@ -316,20 +268,6 @@ llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) {
return None;
}
-llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop,
- StringRef Name) {
- const MDOperand *AttrMD =
- findStringMetadataForLoop(TheLoop, Name).getValueOr(nullptr);
- if (!AttrMD)
- return None;
-
- ConstantInt *IntMD = mdconst::extract_or_null<ConstantInt>(AttrMD->get());
- if (!IntMD)
- return None;
-
- return IntMD->getSExtValue();
-}
-
Optional<MDNode *> llvm::makeFollowupLoopID(
MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
const char *InheritOptionsExceptPrefix, bool AlwaysNew) {
@@ -419,11 +357,7 @@ bool llvm::hasDisableLICMTransformsHint(const Loop *L) {
return getBooleanLoopAttribute(L, LLVMLoopDisableLICM);
}
-bool llvm::hasMustProgress(const Loop *L) {
- return getBooleanLoopAttribute(L, LLVMLoopMustProgress);
-}
-
-TransformationMode llvm::hasUnrollTransformation(Loop *L) {
+TransformationMode llvm::hasUnrollTransformation(const Loop *L) {
if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
return TM_SuppressedByUser;
@@ -444,7 +378,7 @@ TransformationMode llvm::hasUnrollTransformation(Loop *L) {
return TM_Unspecified;
}
-TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) {
+TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) {
if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))
return TM_SuppressedByUser;
@@ -462,7 +396,7 @@ TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) {
return TM_Unspecified;
}
-TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
+TransformationMode llvm::hasVectorizeTransformation(const Loop *L) {
Optional<bool> Enable =
getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");
@@ -498,7 +432,7 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
return TM_Unspecified;
}
-TransformationMode llvm::hasDistributeTransformation(Loop *L) {
+TransformationMode llvm::hasDistributeTransformation(const Loop *L) {
if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))
return TM_ForcedByUser;
@@ -508,7 +442,7 @@ TransformationMode llvm::hasDistributeTransformation(Loop *L) {
return TM_Unspecified;
}
-TransformationMode llvm::hasLICMVersioningTransformation(Loop *L) {
+TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) {
if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))
return TM_SuppressedByUser;
@@ -789,8 +723,8 @@ void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());
DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
- (void)changeToUnreachable(BackedgeBB->getTerminator(), /*UseTrap*/false,
- /*PreserveLCSSA*/true, &DTU, MSSAU.get());
+ (void)changeToUnreachable(BackedgeBB->getTerminator(),
+ /*PreserveLCSSA*/ true, &DTU, MSSAU.get());
// Erase (and destroy) this loop instance. Handles relinking sub-loops
// and blocks within the loop as needed.
@@ -944,12 +878,6 @@ Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
break;
}
- // We only match FP sequences that are 'fast', so we can unconditionally
- // set it on any generated instructions.
- IRBuilderBase::FastMathFlagGuard FMFG(Builder);
- FastMathFlags FMF;
- FMF.setFast();
- Builder.setFastMathFlags(FMF);
Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp");
Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
return Select;
@@ -1031,14 +959,10 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
const TargetTransformInfo *TTI,
Value *Src, RecurKind RdxKind,
ArrayRef<Value *> RedOps) {
- unsigned Opcode = RecurrenceDescriptor::getOpcode(RdxKind);
TargetTransformInfo::ReductionFlags RdxFlags;
RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || RdxKind == RecurKind::UMax ||
RdxKind == RecurKind::FMax;
RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin;
- if (!ForceReductionIntrinsic &&
- !TTI->useReductionIntrinsic(Opcode, Src->getType(), RdxFlags))
- return getShuffleReduction(Builder, Src, Opcode, RdxKind, RedOps);
auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
switch (RdxKind) {
@@ -1076,7 +1000,8 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
Value *llvm::createTargetReduction(IRBuilderBase &B,
const TargetTransformInfo *TTI,
- RecurrenceDescriptor &Desc, Value *Src) {
+ const RecurrenceDescriptor &Desc,
+ Value *Src) {
// TODO: Support in-order reductions based on the recurrence descriptor.
// All ops in the reduction inherit fast-math-flags from the recurrence
// descriptor.
@@ -1085,6 +1010,17 @@ Value *llvm::createTargetReduction(IRBuilderBase &B,
return createSimpleTargetReduction(B, TTI, Src, Desc.getRecurrenceKind());
}
+Value *llvm::createOrderedReduction(IRBuilderBase &B,
+ const RecurrenceDescriptor &Desc,
+ Value *Src, Value *Start) {
+ assert(Desc.getRecurrenceKind() == RecurKind::FAdd &&
+ "Unexpected reduction kind");
+ assert(Src->getType()->isVectorTy() && "Expected a vector type");
+ assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
+
+ return B.CreateFAddReduce(Start, Src);
+}
+
void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
auto *VecOp = dyn_cast<Instruction>(I);
if (!VecOp)
@@ -1587,55 +1523,31 @@ struct PointerBounds {
/// in \p TheLoop. \return the values for the bounds.
static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG,
Loop *TheLoop, Instruction *Loc,
- SCEVExpander &Exp, ScalarEvolution *SE) {
- // TODO: Add helper to retrieve pointers to CG.
- Value *Ptr = CG->RtCheck.Pointers[CG->Members[0]].PointerValue;
- const SCEV *Sc = SE->getSCEV(Ptr);
-
- unsigned AS = Ptr->getType()->getPointerAddressSpace();
+ SCEVExpander &Exp) {
LLVMContext &Ctx = Loc->getContext();
-
- // Use this type for pointer arithmetic.
- Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
-
- if (SE->isLoopInvariant(Sc, TheLoop)) {
- LLVM_DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:"
- << *Ptr << "\n");
- // Ptr could be in the loop body. If so, expand a new one at the correct
- // location.
- Instruction *Inst = dyn_cast<Instruction>(Ptr);
- Value *NewPtr = (Inst && TheLoop->contains(Inst))
- ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
- : Ptr;
- // We must return a half-open range, which means incrementing Sc.
- const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy));
- Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc);
- return {NewPtr, NewPtrPlusOne};
- } else {
- Value *Start = nullptr, *End = nullptr;
- LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
- Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
- End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
- LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High
- << "\n");
- return {Start, End};
- }
+ Type *PtrArithTy = Type::getInt8PtrTy(Ctx, CG->AddressSpace);
+
+ Value *Start = nullptr, *End = nullptr;
+ LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
+ Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
+ End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
+ LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n");
+ return {Start, End};
}
/// Turns a collection of checks into a collection of expanded upper and
/// lower bounds for both pointers in the check.
static SmallVector<std::pair<PointerBounds, PointerBounds>, 4>
expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,
- Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp) {
+ Instruction *Loc, SCEVExpander &Exp) {
SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds;
// Here we're relying on the SCEV Expander's cache to only emit code for the
// same bounds once.
transform(PointerChecks, std::back_inserter(ChecksWithBounds),
[&](const RuntimePointerCheck &Check) {
- PointerBounds First = expandBounds(Check.first, L, Loc, Exp, SE),
- Second =
- expandBounds(Check.second, L, Loc, Exp, SE);
+ PointerBounds First = expandBounds(Check.first, L, Loc, Exp),
+ Second = expandBounds(Check.second, L, Loc, Exp);
return std::make_pair(First, Second);
});
@@ -1645,12 +1557,10 @@ expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,
std::pair<Instruction *, Instruction *> llvm::addRuntimeChecks(
Instruction *Loc, Loop *TheLoop,
const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
- ScalarEvolution *SE) {
+ SCEVExpander &Exp) {
// TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.
// TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible
- const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
- SCEVExpander Exp(*SE, DL, "induction");
- auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, SE, Exp);
+ auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, Exp);
LLVMContext &Ctx = Loc->getContext();
Instruction *FirstInst = nullptr;
@@ -1722,3 +1632,177 @@ std::pair<Instruction *, Instruction *> llvm::addRuntimeChecks(
FirstInst = GetFirstInst(FirstInst, Check, Loc);
return std::make_pair(FirstInst, Check);
}
+
+Optional<IVConditionInfo> llvm::hasPartialIVCondition(Loop &L,
+ unsigned MSSAThreshold,
+ MemorySSA &MSSA,
+ AAResults &AA) {
+ auto *TI = dyn_cast<BranchInst>(L.getHeader()->getTerminator());
+ if (!TI || !TI->isConditional())
+ return {};
+
+ auto *CondI = dyn_cast<CmpInst>(TI->getCondition());
+ // The case with the condition outside the loop should already be handled
+ // earlier.
+ if (!CondI || !L.contains(CondI))
+ return {};
+
+ SmallVector<Instruction *> InstToDuplicate;
+ InstToDuplicate.push_back(CondI);
+
+ SmallVector<Value *, 4> WorkList;
+ WorkList.append(CondI->op_begin(), CondI->op_end());
+
+ SmallVector<MemoryAccess *, 4> AccessesToCheck;
+ SmallVector<MemoryLocation, 4> AccessedLocs;
+ while (!WorkList.empty()) {
+ Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val());
+ if (!I || !L.contains(I))
+ continue;
+
+ // TODO: support additional instructions.
+ if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I))
+ return {};
+
+ // Do not duplicate volatile and atomic loads.
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ if (LI->isVolatile() || LI->isAtomic())
+ return {};
+
+ InstToDuplicate.push_back(I);
+ if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
+ if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
+ // Queue the defining access to check for alias checks.
+ AccessesToCheck.push_back(MemUse->getDefiningAccess());
+ AccessedLocs.push_back(MemoryLocation::get(I));
+ } else {
+ // MemoryDefs may clobber the location or may be atomic memory
+ // operations. Bail out.
+ return {};
+ }
+ }
+ WorkList.append(I->op_begin(), I->op_end());
+ }
+
+ if (InstToDuplicate.empty())
+ return {};
+
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ L.getExitingBlocks(ExitingBlocks);
+ auto HasNoClobbersOnPath =
+ [&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate,
+ MSSAThreshold](BasicBlock *Succ, BasicBlock *Header,
+ SmallVector<MemoryAccess *, 4> AccessesToCheck)
+ -> Optional<IVConditionInfo> {
+ IVConditionInfo Info;
+ // First, collect all blocks in the loop that are on a patch from Succ
+ // to the header.
+ SmallVector<BasicBlock *, 4> WorkList;
+ WorkList.push_back(Succ);
+ WorkList.push_back(Header);
+ SmallPtrSet<BasicBlock *, 4> Seen;
+ Seen.insert(Header);
+ Info.PathIsNoop &=
+ all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); });
+
+ while (!WorkList.empty()) {
+ BasicBlock *Current = WorkList.pop_back_val();
+ if (!L.contains(Current))
+ continue;
+ const auto &SeenIns = Seen.insert(Current);
+ if (!SeenIns.second)
+ continue;
+
+ Info.PathIsNoop &= all_of(
+ *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); });
+ WorkList.append(succ_begin(Current), succ_end(Current));
+ }
+
+ // Require at least 2 blocks on a path through the loop. This skips
+ // paths that directly exit the loop.
+ if (Seen.size() < 2)
+ return {};
+
+ // Next, check if there are any MemoryDefs that are on the path through
+ // the loop (in the Seen set) and they may-alias any of the locations in
+ // AccessedLocs. If that is the case, they may modify the condition and
+ // partial unswitching is not possible.
+ SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
+ while (!AccessesToCheck.empty()) {
+ MemoryAccess *Current = AccessesToCheck.pop_back_val();
+ auto SeenI = SeenAccesses.insert(Current);
+ if (!SeenI.second || !Seen.contains(Current->getBlock()))
+ continue;
+
+ // Bail out if exceeded the threshold.
+ if (SeenAccesses.size() >= MSSAThreshold)
+ return {};
+
+ // MemoryUse are read-only accesses.
+ if (isa<MemoryUse>(Current))
+ continue;
+
+ // For a MemoryDef, check if is aliases any of the location feeding
+ // the original condition.
+ if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
+ if (any_of(AccessedLocs, [&AA, CurrentDef](MemoryLocation &Loc) {
+ return isModSet(
+ AA.getModRefInfo(CurrentDef->getMemoryInst(), Loc));
+ }))
+ return {};
+ }
+
+ for (Use &U : Current->uses())
+ AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
+ }
+
+ // We could also allow loops with known trip counts without mustprogress,
+ // but ScalarEvolution may not be available.
+ Info.PathIsNoop &= isMustProgress(&L);
+
+ // If the path is considered a no-op so far, check if it reaches a
+ // single exit block without any phis. This ensures no values from the
+ // loop are used outside of the loop.
+ if (Info.PathIsNoop) {
+ for (auto *Exiting : ExitingBlocks) {
+ if (!Seen.contains(Exiting))
+ continue;
+ for (auto *Succ : successors(Exiting)) {
+ if (L.contains(Succ))
+ continue;
+
+ Info.PathIsNoop &= llvm::empty(Succ->phis()) &&
+ (!Info.ExitForPath || Info.ExitForPath == Succ);
+ if (!Info.PathIsNoop)
+ break;
+ assert((!Info.ExitForPath || Info.ExitForPath == Succ) &&
+ "cannot have multiple exit blocks");
+ Info.ExitForPath = Succ;
+ }
+ }
+ }
+ if (!Info.ExitForPath)
+ Info.PathIsNoop = false;
+
+ Info.InstToDuplicate = InstToDuplicate;
+ return Info;
+ };
+
+ // If we branch to the same successor, partial unswitching will not be
+ // beneficial.
+ if (TI->getSuccessor(0) == TI->getSuccessor(1))
+ return {};
+
+ if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L.getHeader(),
+ AccessesToCheck)) {
+ Info->KnownValue = ConstantInt::getTrue(TI->getContext());
+ return Info;
+ }
+ if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L.getHeader(),
+ AccessesToCheck)) {
+ Info->KnownValue = ConstantInt::getFalse(TI->getContext());
+ return Info;
+ }
+
+ return {};
+}