diff options
Diffstat (limited to 'lib/Transforms/Utils')
-rw-r--r-- | lib/Transforms/Utils/LoopUnroll.cpp | 50 | ||||
-rw-r--r-- | lib/Transforms/Utils/LoopUnrollRuntime.cpp | 8 | ||||
-rw-r--r-- | lib/Transforms/Utils/LoopUtils.cpp | 9 | ||||
-rw-r--r-- | lib/Transforms/Utils/SimplifyCFG.cpp | 9 | ||||
-rw-r--r-- | lib/Transforms/Utils/SimplifyLibCalls.cpp | 23 |
5 files changed, 71 insertions, 28 deletions
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index e551e4b47ac1f..f9a602bc268af 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -172,6 +172,36 @@ static bool needToInsertPhisForLCSSA(Loop *L, std::vector<BasicBlock *> Blocks, return false; } +/// Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary +/// and adds a mapping from the original loop to the new loop to NewLoops. +/// Returns nullptr if no new loop was created and a pointer to the +/// original loop OriginalBB was part of otherwise. +const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB, + BasicBlock *ClonedBB, LoopInfo *LI, + NewLoopsMap &NewLoops) { + // Figure out which loop New is in. + const Loop *OldLoop = LI->getLoopFor(OriginalBB); + assert(OldLoop && "Should (at least) be in the loop being unrolled!"); + + Loop *&NewLoop = NewLoops[OldLoop]; + if (!NewLoop) { + // Found a new sub-loop. + assert(OriginalBB == OldLoop->getHeader() && + "Header should be first in RPO"); + + Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop()); + assert(NewLoopParent && + "Expected parent loop before sub-loop in RPO"); + NewLoop = new Loop; + NewLoopParent->addChildLoop(NewLoop); + NewLoop->addBasicBlockToLoop(ClonedBB, *LI); + return OldLoop; + } else { + NewLoop->addBasicBlockToLoop(ClonedBB, *LI); + return nullptr; + } +} + /// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true /// if unrolling was successful, or false if the loop was unmodified. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional @@ -428,28 +458,14 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop"); L->addBasicBlockToLoop(New, *LI); } else { - // Figure out which loop New is in. - const Loop *OldLoop = LI->getLoopFor(*BB); - assert(OldLoop && "Should (at least) be in the loop being unrolled!"); - - Loop *&NewLoop = NewLoops[OldLoop]; - if (!NewLoop) { - // Found a new sub-loop. - assert(*BB == OldLoop->getHeader() && - "Header should be first in RPO"); - - Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop()); - assert(NewLoopParent && - "Expected parent loop before sub-loop in RPO"); - NewLoop = new Loop; - NewLoopParent->addChildLoop(NewLoop); - LoopsToSimplify.insert(NewLoop); + const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops); + if (OldLoop) { + LoopsToSimplify.insert(NewLoops[OldLoop]); // Forget the old loop, since its inputs may have changed. if (SE) SE->forgetLoop(OldLoop); } - NewLoop->addBasicBlockToLoop(New, *LI); } if (*BB == Header) diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 5758a415f12b2..85da3ba899a53 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -301,15 +301,17 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, LI->addTopLevelLoop(NewLoop); } + NewLoopsMap NewLoops; + NewLoops[L] = NewLoop; // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); - if (NewLoop) - NewLoop->addBasicBlockToLoop(NewBB, *LI); - else if (ParentLoop) + if (NewLoop) { + addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); + } else if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index 09e9f1ddc7fec..c8efa9efc7f34 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -869,8 +869,13 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, return false; } - assert(TheLoop->getHeader() == Phi->getParent() && - "PHI is an AddRec for a different loop?!"); + if (AR->getLoop() != TheLoop) { + // FIXME: We should treat this as a uniform. Unfortunately, we + // don't currently know how to handled uniform PHIs. + DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); + return false; + } + Value *StartValue = Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); const SCEV *Step = AR->getStepRecurrence(*SE); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 54390e77bb1f7..6e30919246c74 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1275,10 +1275,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, LLVMContext::MD_mem_parallel_loop_access}; combineMetadata(I1, I2, KnownIDs); - // If the debug loc for I1 and I2 are different, as we are combining them - // into one instruction, we do not want to select debug loc randomly from - // I1 or I2. - if (!isa<CallInst>(I1) && I1->getDebugLoc() != I2->getDebugLoc()) + // I1 and I2 are being combined into a single instruction. Its debug + // location is the merged locations of the original instructions. + if (!isa<CallInst>(I1)) I1->setDebugLoc( DILocation::getMergedLocation(I1->getDebugLoc(), I2->getDebugLoc())); @@ -1577,7 +1576,7 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { // The debug location for the "common" instruction is the merged locations of // all the commoned instructions. We start with the original location of the // "common" instruction and iteratively merge each location in the loop below. - DILocation *Loc = I0->getDebugLoc(); + const DILocation *Loc = I0->getDebugLoc(); // Update metadata and IR flags, and merge debug locations. for (auto *I : Insts) diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 11d54bcf4f89d..8eaeb1073a761 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1074,6 +1074,24 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 return ConstantFP::get(CI->getType(), 1.0); + if (Op2C->isExactlyValue(-0.5) && + hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::sqrt, LibFunc::sqrtf, + LibFunc::sqrtl)) { + // If -ffast-math: + // pow(x, -0.5) -> 1.0 / sqrt(x) + if (CI->hasUnsafeAlgebra()) { + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + + // Here we cannot lower to an intrinsic because C99 sqrt() and llvm.sqrt + // are not guaranteed to have the same semantics. + Value *Sqrt = emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B, + Callee->getAttributes()); + + return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Sqrt, "sqrtrecip"); + } + } + if (Op2C->isExactlyValue(0.5) && hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::sqrt, LibFunc::sqrtf, LibFunc::sqrtl) && @@ -1121,6 +1139,10 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { !V.isInteger()) return nullptr; + // Propagate fast math flags. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + // We will memoize intermediate products of the Addition Chain. Value *InnerChain[33] = {nullptr}; InnerChain[1] = Op1; @@ -1131,7 +1153,6 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { bool ignored; V.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &ignored); - // TODO: Should the new instructions propagate the 'fast' flag of the pow()? Value *FMul = getPow(InnerChain, V.convertToDouble(), B); // For negative exponents simply compute the reciprocal. if (Op2C->isNegative()) |