diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2021-07-29 20:15:26 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2021-07-29 20:15:26 +0000 |
commit | 344a3780b2e33f6ca763666c380202b18aab72a3 (patch) | |
tree | f0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/Transforms/Utils/LoopUtils.cpp | |
parent | b60736ec1405bb0a8dd40989f67ef4c93da068ab (diff) |
vendor/llvm-project/llvmorg-13-init-16847-g88e66fa60ae5vendor/llvm-project/llvmorg-12.0.1-rc2-0-ge7dac564cd0evendor/llvm-project/llvmorg-12.0.1-0-gfed41342a82f
Diffstat (limited to 'llvm/lib/Transforms/Utils/LoopUtils.cpp')
-rw-r--r-- | llvm/lib/Transforms/Utils/LoopUtils.cpp | 334 |
1 files changed, 209 insertions, 125 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index f0f423e9812a..e4d78f9ada08 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -54,16 +54,10 @@ using namespace llvm; using namespace llvm::PatternMatch; -static cl::opt<bool> ForceReductionIntrinsic( - "force-reduction-intrinsics", cl::Hidden, - cl::desc("Force creating reduction intrinsics for testing."), - cl::init(false)); - #define DEBUG_TYPE "loop-utils" static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced"; static const char *LLVMLoopDisableLICM = "llvm.licm.disable"; -static const char *LLVMLoopMustProgress = "llvm.loop.mustprogress"; bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, @@ -260,50 +254,8 @@ void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD, TheLoop->setLoopID(NewLoopID); } -/// Find string metadata for loop -/// -/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an -/// operand or null otherwise. If the string metadata is not found return -/// Optional's not-a-value. -Optional<const MDOperand *> llvm::findStringMetadataForLoop(const Loop *TheLoop, - StringRef Name) { - MDNode *MD = findOptionMDForLoop(TheLoop, Name); - if (!MD) - return None; - switch (MD->getNumOperands()) { - case 1: - return nullptr; - case 2: - return &MD->getOperand(1); - default: - llvm_unreachable("loop metadata has 0 or 1 operand"); - } -} - -static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop, - StringRef Name) { - MDNode *MD = findOptionMDForLoop(TheLoop, Name); - if (!MD) - return None; - switch (MD->getNumOperands()) { - case 1: - // When the value is absent it is interpreted as 'attribute set'. - return true; - case 2: - if (ConstantInt *IntMD = - mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get())) - return IntMD->getZExtValue(); - return true; - } - llvm_unreachable("unexpected number of options"); -} - -bool llvm::getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) { - return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false); -} - Optional<ElementCount> -llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) { +llvm::getOptionalElementCountLoopAttribute(const Loop *TheLoop) { Optional<int> Width = getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width"); @@ -316,20 +268,6 @@ llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) { return None; } -llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop, - StringRef Name) { - const MDOperand *AttrMD = - findStringMetadataForLoop(TheLoop, Name).getValueOr(nullptr); - if (!AttrMD) - return None; - - ConstantInt *IntMD = mdconst::extract_or_null<ConstantInt>(AttrMD->get()); - if (!IntMD) - return None; - - return IntMD->getSExtValue(); -} - Optional<MDNode *> llvm::makeFollowupLoopID( MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions, const char *InheritOptionsExceptPrefix, bool AlwaysNew) { @@ -419,11 +357,7 @@ bool llvm::hasDisableLICMTransformsHint(const Loop *L) { return getBooleanLoopAttribute(L, LLVMLoopDisableLICM); } -bool llvm::hasMustProgress(const Loop *L) { - return getBooleanLoopAttribute(L, LLVMLoopMustProgress); -} - -TransformationMode llvm::hasUnrollTransformation(Loop *L) { +TransformationMode llvm::hasUnrollTransformation(const Loop *L) { if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable")) return TM_SuppressedByUser; @@ -444,7 +378,7 @@ TransformationMode llvm::hasUnrollTransformation(Loop *L) { return TM_Unspecified; } -TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) { +TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) { if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable")) return TM_SuppressedByUser; @@ -462,7 +396,7 @@ TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) { return TM_Unspecified; } -TransformationMode llvm::hasVectorizeTransformation(Loop *L) { +TransformationMode llvm::hasVectorizeTransformation(const Loop *L) { Optional<bool> Enable = getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable"); @@ -498,7 +432,7 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) { return TM_Unspecified; } -TransformationMode llvm::hasDistributeTransformation(Loop *L) { +TransformationMode llvm::hasDistributeTransformation(const Loop *L) { if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable")) return TM_ForcedByUser; @@ -508,7 +442,7 @@ TransformationMode llvm::hasDistributeTransformation(Loop *L) { return TM_Unspecified; } -TransformationMode llvm::hasLICMVersioningTransformation(Loop *L) { +TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) { if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable")) return TM_SuppressedByUser; @@ -789,8 +723,8 @@ void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get()); DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager); - (void)changeToUnreachable(BackedgeBB->getTerminator(), /*UseTrap*/false, - /*PreserveLCSSA*/true, &DTU, MSSAU.get()); + (void)changeToUnreachable(BackedgeBB->getTerminator(), + /*PreserveLCSSA*/ true, &DTU, MSSAU.get()); // Erase (and destroy) this loop instance. Handles relinking sub-loops // and blocks within the loop as needed. @@ -944,12 +878,6 @@ Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, break; } - // We only match FP sequences that are 'fast', so we can unconditionally - // set it on any generated instructions. - IRBuilderBase::FastMathFlagGuard FMFG(Builder); - FastMathFlags FMF; - FMF.setFast(); - Builder.setFastMathFlags(FMF); Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp"); Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); return Select; @@ -1031,14 +959,10 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, const TargetTransformInfo *TTI, Value *Src, RecurKind RdxKind, ArrayRef<Value *> RedOps) { - unsigned Opcode = RecurrenceDescriptor::getOpcode(RdxKind); TargetTransformInfo::ReductionFlags RdxFlags; RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || RdxKind == RecurKind::UMax || RdxKind == RecurKind::FMax; RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin; - if (!ForceReductionIntrinsic && - !TTI->useReductionIntrinsic(Opcode, Src->getType(), RdxFlags)) - return getShuffleReduction(Builder, Src, Opcode, RdxKind, RedOps); auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType(); switch (RdxKind) { @@ -1076,7 +1000,8 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value *llvm::createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, - RecurrenceDescriptor &Desc, Value *Src) { + const RecurrenceDescriptor &Desc, + Value *Src) { // TODO: Support in-order reductions based on the recurrence descriptor. // All ops in the reduction inherit fast-math-flags from the recurrence // descriptor. @@ -1085,6 +1010,17 @@ Value *llvm::createTargetReduction(IRBuilderBase &B, return createSimpleTargetReduction(B, TTI, Src, Desc.getRecurrenceKind()); } +Value *llvm::createOrderedReduction(IRBuilderBase &B, + const RecurrenceDescriptor &Desc, + Value *Src, Value *Start) { + assert(Desc.getRecurrenceKind() == RecurKind::FAdd && + "Unexpected reduction kind"); + assert(Src->getType()->isVectorTy() && "Expected a vector type"); + assert(!Start->getType()->isVectorTy() && "Expected a scalar type"); + + return B.CreateFAddReduce(Start, Src); +} + void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) { auto *VecOp = dyn_cast<Instruction>(I); if (!VecOp) @@ -1587,55 +1523,31 @@ struct PointerBounds { /// in \p TheLoop. \return the values for the bounds. static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, - SCEVExpander &Exp, ScalarEvolution *SE) { - // TODO: Add helper to retrieve pointers to CG. - Value *Ptr = CG->RtCheck.Pointers[CG->Members[0]].PointerValue; - const SCEV *Sc = SE->getSCEV(Ptr); - - unsigned AS = Ptr->getType()->getPointerAddressSpace(); + SCEVExpander &Exp) { LLVMContext &Ctx = Loc->getContext(); - - // Use this type for pointer arithmetic. - Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); - - if (SE->isLoopInvariant(Sc, TheLoop)) { - LLVM_DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" - << *Ptr << "\n"); - // Ptr could be in the loop body. If so, expand a new one at the correct - // location. - Instruction *Inst = dyn_cast<Instruction>(Ptr); - Value *NewPtr = (Inst && TheLoop->contains(Inst)) - ? Exp.expandCodeFor(Sc, PtrArithTy, Loc) - : Ptr; - // We must return a half-open range, which means incrementing Sc. - const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy)); - Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc); - return {NewPtr, NewPtrPlusOne}; - } else { - Value *Start = nullptr, *End = nullptr; - LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); - Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc); - End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc); - LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High - << "\n"); - return {Start, End}; - } + Type *PtrArithTy = Type::getInt8PtrTy(Ctx, CG->AddressSpace); + + Value *Start = nullptr, *End = nullptr; + LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); + Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc); + End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc); + LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n"); + return {Start, End}; } /// Turns a collection of checks into a collection of expanded upper and /// lower bounds for both pointers in the check. static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L, - Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp) { + Instruction *Loc, SCEVExpander &Exp) { SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds; // Here we're relying on the SCEV Expander's cache to only emit code for the // same bounds once. transform(PointerChecks, std::back_inserter(ChecksWithBounds), [&](const RuntimePointerCheck &Check) { - PointerBounds First = expandBounds(Check.first, L, Loc, Exp, SE), - Second = - expandBounds(Check.second, L, Loc, Exp, SE); + PointerBounds First = expandBounds(Check.first, L, Loc, Exp), + Second = expandBounds(Check.second, L, Loc, Exp); return std::make_pair(First, Second); }); @@ -1645,12 +1557,10 @@ expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L, std::pair<Instruction *, Instruction *> llvm::addRuntimeChecks( Instruction *Loc, Loop *TheLoop, const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, - ScalarEvolution *SE) { + SCEVExpander &Exp) { // TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible. // TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible - const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout(); - SCEVExpander Exp(*SE, DL, "induction"); - auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, SE, Exp); + auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, Exp); LLVMContext &Ctx = Loc->getContext(); Instruction *FirstInst = nullptr; @@ -1722,3 +1632,177 @@ std::pair<Instruction *, Instruction *> llvm::addRuntimeChecks( FirstInst = GetFirstInst(FirstInst, Check, Loc); return std::make_pair(FirstInst, Check); } + +Optional<IVConditionInfo> llvm::hasPartialIVCondition(Loop &L, + unsigned MSSAThreshold, + MemorySSA &MSSA, + AAResults &AA) { + auto *TI = dyn_cast<BranchInst>(L.getHeader()->getTerminator()); + if (!TI || !TI->isConditional()) + return {}; + + auto *CondI = dyn_cast<CmpInst>(TI->getCondition()); + // The case with the condition outside the loop should already be handled + // earlier. + if (!CondI || !L.contains(CondI)) + return {}; + + SmallVector<Instruction *> InstToDuplicate; + InstToDuplicate.push_back(CondI); + + SmallVector<Value *, 4> WorkList; + WorkList.append(CondI->op_begin(), CondI->op_end()); + + SmallVector<MemoryAccess *, 4> AccessesToCheck; + SmallVector<MemoryLocation, 4> AccessedLocs; + while (!WorkList.empty()) { + Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val()); + if (!I || !L.contains(I)) + continue; + + // TODO: support additional instructions. + if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I)) + return {}; + + // Do not duplicate volatile and atomic loads. + if (auto *LI = dyn_cast<LoadInst>(I)) + if (LI->isVolatile() || LI->isAtomic()) + return {}; + + InstToDuplicate.push_back(I); + if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) { + if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) { + // Queue the defining access to check for alias checks. + AccessesToCheck.push_back(MemUse->getDefiningAccess()); + AccessedLocs.push_back(MemoryLocation::get(I)); + } else { + // MemoryDefs may clobber the location or may be atomic memory + // operations. Bail out. + return {}; + } + } + WorkList.append(I->op_begin(), I->op_end()); + } + + if (InstToDuplicate.empty()) + return {}; + + SmallVector<BasicBlock *, 4> ExitingBlocks; + L.getExitingBlocks(ExitingBlocks); + auto HasNoClobbersOnPath = + [&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate, + MSSAThreshold](BasicBlock *Succ, BasicBlock *Header, + SmallVector<MemoryAccess *, 4> AccessesToCheck) + -> Optional<IVConditionInfo> { + IVConditionInfo Info; + // First, collect all blocks in the loop that are on a patch from Succ + // to the header. + SmallVector<BasicBlock *, 4> WorkList; + WorkList.push_back(Succ); + WorkList.push_back(Header); + SmallPtrSet<BasicBlock *, 4> Seen; + Seen.insert(Header); + Info.PathIsNoop &= + all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); }); + + while (!WorkList.empty()) { + BasicBlock *Current = WorkList.pop_back_val(); + if (!L.contains(Current)) + continue; + const auto &SeenIns = Seen.insert(Current); + if (!SeenIns.second) + continue; + + Info.PathIsNoop &= all_of( + *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); }); + WorkList.append(succ_begin(Current), succ_end(Current)); + } + + // Require at least 2 blocks on a path through the loop. This skips + // paths that directly exit the loop. + if (Seen.size() < 2) + return {}; + + // Next, check if there are any MemoryDefs that are on the path through + // the loop (in the Seen set) and they may-alias any of the locations in + // AccessedLocs. If that is the case, they may modify the condition and + // partial unswitching is not possible. + SmallPtrSet<MemoryAccess *, 4> SeenAccesses; + while (!AccessesToCheck.empty()) { + MemoryAccess *Current = AccessesToCheck.pop_back_val(); + auto SeenI = SeenAccesses.insert(Current); + if (!SeenI.second || !Seen.contains(Current->getBlock())) + continue; + + // Bail out if exceeded the threshold. + if (SeenAccesses.size() >= MSSAThreshold) + return {}; + + // MemoryUse are read-only accesses. + if (isa<MemoryUse>(Current)) + continue; + + // For a MemoryDef, check if is aliases any of the location feeding + // the original condition. + if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) { + if (any_of(AccessedLocs, [&AA, CurrentDef](MemoryLocation &Loc) { + return isModSet( + AA.getModRefInfo(CurrentDef->getMemoryInst(), Loc)); + })) + return {}; + } + + for (Use &U : Current->uses()) + AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser())); + } + + // We could also allow loops with known trip counts without mustprogress, + // but ScalarEvolution may not be available. + Info.PathIsNoop &= isMustProgress(&L); + + // If the path is considered a no-op so far, check if it reaches a + // single exit block without any phis. This ensures no values from the + // loop are used outside of the loop. + if (Info.PathIsNoop) { + for (auto *Exiting : ExitingBlocks) { + if (!Seen.contains(Exiting)) + continue; + for (auto *Succ : successors(Exiting)) { + if (L.contains(Succ)) + continue; + + Info.PathIsNoop &= llvm::empty(Succ->phis()) && + (!Info.ExitForPath || Info.ExitForPath == Succ); + if (!Info.PathIsNoop) + break; + assert((!Info.ExitForPath || Info.ExitForPath == Succ) && + "cannot have multiple exit blocks"); + Info.ExitForPath = Succ; + } + } + } + if (!Info.ExitForPath) + Info.PathIsNoop = false; + + Info.InstToDuplicate = InstToDuplicate; + return Info; + }; + + // If we branch to the same successor, partial unswitching will not be + // beneficial. + if (TI->getSuccessor(0) == TI->getSuccessor(1)) + return {}; + + if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L.getHeader(), + AccessesToCheck)) { + Info->KnownValue = ConstantInt::getTrue(TI->getContext()); + return Info; + } + if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L.getHeader(), + AccessesToCheck)) { + Info->KnownValue = ConstantInt::getFalse(TI->getContext()); + return Info; + } + + return {}; +} |