diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 |
commit | d8e91e46262bc44006913e6796843909f1ac7bcd (patch) | |
tree | 7d0c143d9b38190e0fa0180805389da22cd834c5 /lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp | |
parent | b7eb8e35e481a74962664b63dfb09483b200209a (diff) |
Notes
Diffstat (limited to 'lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp')
-rw-r--r-- | lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp | 196 |
1 files changed, 133 insertions, 63 deletions
diff --git a/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp index 86c99aed4417..da46210b6fdd 100644 --- a/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp @@ -56,6 +56,20 @@ using namespace llvm; #define DEBUG_TYPE "loop-unroll-and-jam" +/// @{ +/// Metadata attribute names +static const char *const LLVMLoopUnrollAndJamFollowupAll = + "llvm.loop.unroll_and_jam.followup_all"; +static const char *const LLVMLoopUnrollAndJamFollowupInner = + "llvm.loop.unroll_and_jam.followup_inner"; +static const char *const LLVMLoopUnrollAndJamFollowupOuter = + "llvm.loop.unroll_and_jam.followup_outer"; +static const char *const LLVMLoopUnrollAndJamFollowupRemainderInner = + "llvm.loop.unroll_and_jam.followup_remainder_inner"; +static const char *const LLVMLoopUnrollAndJamFollowupRemainderOuter = + "llvm.loop.unroll_and_jam.followup_remainder_outer"; +/// @} + static cl::opt<bool> AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden, cl::desc("Allows loops to be unroll-and-jammed.")); @@ -112,11 +126,6 @@ static bool HasUnrollAndJamEnablePragma(const Loop *L) { return GetUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.enable"); } -// Returns true if the loop has an unroll_and_jam(disable) pragma. -static bool HasUnrollAndJamDisablePragma(const Loop *L) { - return GetUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.disable"); -} - // If loop has an unroll_and_jam_count pragma return the (necessarily // positive) value from the pragma. Otherwise return 0. static unsigned UnrollAndJamCountPragmaValue(const Loop *L) { @@ -149,7 +158,26 @@ static bool computeUnrollAndJamCount( OptimizationRemarkEmitter *ORE, unsigned OuterTripCount, unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount, unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP) { - // Check for explicit Count from the "unroll-and-jam-count" option. + // First up use computeUnrollCount from the loop unroller to get a count + // for unrolling the outer loop, plus any loops requiring explicit + // unrolling we leave to the unroller. This uses UP.Threshold / + // UP.PartialThreshold / UP.MaxCount to come up with sensible loop values. + // We have already checked that the loop has no unroll.* pragmas. + unsigned MaxTripCount = 0; + bool UseUpperBound = false; + bool ExplicitUnroll = computeUnrollCount( + L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount, + OuterTripMultiple, OuterLoopSize, UP, UseUpperBound); + if (ExplicitUnroll || UseUpperBound) { + // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it + // for the unroller instead. + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; explicit count set by " + "computeUnrollCount\n"); + UP.Count = 0; + return false; + } + + // Override with any explicit Count from the "unroll-and-jam-count" option. bool UserUnrollCount = UnrollAndJamCount.getNumOccurrences() > 0; if (UserUnrollCount) { UP.Count = UnrollAndJamCount; @@ -174,80 +202,76 @@ static bool computeUnrollAndJamCount( return true; } - // Use computeUnrollCount from the loop unroller to get a sensible count - // for the unrolling the outer loop. This uses UP.Threshold / - // UP.PartialThreshold / UP.MaxCount to come up with sensible loop values. - // We have already checked that the loop has no unroll.* pragmas. - unsigned MaxTripCount = 0; - bool UseUpperBound = false; - bool ExplicitUnroll = computeUnrollCount( - L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount, - OuterTripMultiple, OuterLoopSize, UP, UseUpperBound); - if (ExplicitUnroll || UseUpperBound) { - // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it - // for the unroller instead. - UP.Count = 0; - return false; - } - bool PragmaEnableUnroll = HasUnrollAndJamEnablePragma(L); - ExplicitUnroll = PragmaCount > 0 || PragmaEnableUnroll || UserUnrollCount; + bool ExplicitUnrollAndJamCount = PragmaCount > 0 || UserUnrollCount; + bool ExplicitUnrollAndJam = PragmaEnableUnroll || ExplicitUnrollAndJamCount; // If the loop has an unrolling pragma, we want to be more aggressive with // unrolling limits. - if (ExplicitUnroll && OuterTripCount != 0) + if (ExplicitUnrollAndJam) UP.UnrollAndJamInnerLoopThreshold = PragmaUnrollAndJamThreshold; if (!UP.AllowRemainder && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >= UP.UnrollAndJamInnerLoopThreshold) { + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't create remainder and " + "inner loop too large\n"); UP.Count = 0; return false; } + // We have a sensible limit for the outer loop, now adjust it for the inner + // loop and UP.UnrollAndJamInnerLoopThreshold. If the outer limit was set + // explicitly, we want to stick to it. + if (!ExplicitUnrollAndJamCount && UP.AllowRemainder) { + while (UP.Count != 0 && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >= + UP.UnrollAndJamInnerLoopThreshold) + UP.Count--; + } + + // If we are explicitly unroll and jamming, we are done. Otherwise there are a + // number of extra performance heuristics to check. + if (ExplicitUnrollAndJam) + return true; + // If the inner loop count is known and small, leave the entire loop nest to // be the unroller - if (!ExplicitUnroll && InnerTripCount && - InnerLoopSize * InnerTripCount < UP.Threshold) { + if (InnerTripCount && InnerLoopSize * InnerTripCount < UP.Threshold) { + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; small inner loop count is " + "being left for the unroller\n"); UP.Count = 0; return false; } - // We have a sensible limit for the outer loop, now adjust it for the inner - // loop and UP.UnrollAndJamInnerLoopThreshold. - while (UP.Count != 0 && UP.AllowRemainder && - getUnrollAndJammedLoopSize(InnerLoopSize, UP) >= - UP.UnrollAndJamInnerLoopThreshold) - UP.Count--; - - if (!ExplicitUnroll) { - // Check for situations where UnJ is likely to be unprofitable. Including - // subloops with more than 1 block. - if (SubLoop->getBlocks().size() != 1) { - UP.Count = 0; - return false; - } + // Check for situations where UnJ is likely to be unprofitable. Including + // subloops with more than 1 block. + if (SubLoop->getBlocks().size() != 1) { + LLVM_DEBUG( + dbgs() << "Won't unroll-and-jam; More than one inner loop block\n"); + UP.Count = 0; + return false; + } - // Limit to loops where there is something to gain from unrolling and - // jamming the loop. In this case, look for loads that are invariant in the - // outer loop and can become shared. - unsigned NumInvariant = 0; - for (BasicBlock *BB : SubLoop->getBlocks()) { - for (Instruction &I : *BB) { - if (auto *Ld = dyn_cast<LoadInst>(&I)) { - Value *V = Ld->getPointerOperand(); - const SCEV *LSCEV = SE.getSCEVAtScope(V, L); - if (SE.isLoopInvariant(LSCEV, L)) - NumInvariant++; - } + // Limit to loops where there is something to gain from unrolling and + // jamming the loop. In this case, look for loads that are invariant in the + // outer loop and can become shared. + unsigned NumInvariant = 0; + for (BasicBlock *BB : SubLoop->getBlocks()) { + for (Instruction &I : *BB) { + if (auto *Ld = dyn_cast<LoadInst>(&I)) { + Value *V = Ld->getPointerOperand(); + const SCEV *LSCEV = SE.getSCEVAtScope(V, L); + if (SE.isLoopInvariant(LSCEV, L)) + NumInvariant++; } } - if (NumInvariant == 0) { - UP.Count = 0; - return false; - } + } + if (NumInvariant == 0) { + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; No loop invariant loads\n"); + UP.Count = 0; + return false; } - return ExplicitUnroll; + return false; } static LoopUnrollResult @@ -284,13 +308,16 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"); + TransformationMode EnableMode = hasUnrollAndJamTransformation(L); + if (EnableMode & TM_Disable) + return LoopUnrollResult::Unmodified; + // A loop with any unroll pragma (enabling/disabling/count/etc) is left for // the unroller, so long as it does not explicitly have unroll_and_jam // metadata. This means #pragma nounroll will disable unroll and jam as well // as unrolling - if (HasUnrollAndJamDisablePragma(L) || - (HasAnyUnrollPragma(L, "llvm.loop.unroll.") && - !HasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam."))) { + if (HasAnyUnrollPragma(L, "llvm.loop.unroll.") && + !HasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam.")) { LLVM_DEBUG(dbgs() << " Disabled due to pragma.\n"); return LoopUnrollResult::Unmodified; } @@ -329,6 +356,19 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, return LoopUnrollResult::Unmodified; } + // Save original loop IDs for after the transformation. + MDNode *OrigOuterLoopID = L->getLoopID(); + MDNode *OrigSubLoopID = SubLoop->getLoopID(); + + // To assign the loop id of the epilogue, assign it before unrolling it so it + // is applied to every inner loop of the epilogue. We later apply the loop ID + // for the jammed inner loop. + Optional<MDNode *> NewInnerEpilogueLoopID = makeFollowupLoopID( + OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll, + LLVMLoopUnrollAndJamFollowupRemainderInner}); + if (NewInnerEpilogueLoopID.hasValue()) + SubLoop->setLoopID(NewInnerEpilogueLoopID.getValue()); + // Find trip count and trip multiple unsigned OuterTripCount = SE.getSmallConstantTripCount(L, Latch); unsigned OuterTripMultiple = SE.getSmallConstantTripMultiple(L, Latch); @@ -344,9 +384,39 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, if (OuterTripCount && UP.Count > OuterTripCount) UP.Count = OuterTripCount; - LoopUnrollResult UnrollResult = - UnrollAndJamLoop(L, UP.Count, OuterTripCount, OuterTripMultiple, - UP.UnrollRemainder, LI, &SE, &DT, &AC, &ORE); + Loop *EpilogueOuterLoop = nullptr; + LoopUnrollResult UnrollResult = UnrollAndJamLoop( + L, UP.Count, OuterTripCount, OuterTripMultiple, UP.UnrollRemainder, LI, + &SE, &DT, &AC, &ORE, &EpilogueOuterLoop); + + // Assign new loop attributes. + if (EpilogueOuterLoop) { + Optional<MDNode *> NewOuterEpilogueLoopID = makeFollowupLoopID( + OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll, + LLVMLoopUnrollAndJamFollowupRemainderOuter}); + if (NewOuterEpilogueLoopID.hasValue()) + EpilogueOuterLoop->setLoopID(NewOuterEpilogueLoopID.getValue()); + } + + Optional<MDNode *> NewInnerLoopID = + makeFollowupLoopID(OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll, + LLVMLoopUnrollAndJamFollowupInner}); + if (NewInnerLoopID.hasValue()) + SubLoop->setLoopID(NewInnerLoopID.getValue()); + else + SubLoop->setLoopID(OrigSubLoopID); + + if (UnrollResult == LoopUnrollResult::PartiallyUnrolled) { + Optional<MDNode *> NewOuterLoopID = makeFollowupLoopID( + OrigOuterLoopID, + {LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupOuter}); + if (NewOuterLoopID.hasValue()) { + L->setLoopID(NewOuterLoopID.getValue()); + + // Do not setLoopAlreadyUnrolled if a followup was given. + return UnrollResult; + } + } // If loop has an unroll count pragma or unrolled by explicitly set count // mark loop as unrolled to prevent unrolling beyond that requested. |